diff --git a/Array.cc b/Array.cc
index 1c703dd5..62d56a1c 100644
--- a/Array.cc
+++ b/Array.cc
@@ -25,23 +25,23 @@ int main()
 {
   array3<double> A(n,m,p);
   double sum=0.0;
-        
+
 // Sequential access:
   int size=A.Size();
   for(int i=0; i < size; i++) A(i)=i;
-        
+
 // Random access:
   for(int i=0; i < n; i++) {
-                
+
 // The following statements are equivalent, but the first one optimizes better.
     array2<double> Ai=A[i];
 //    array2<double> Ai(m,p); Ai=A[i]; // This does an extra memory copy.
-                
+
     for(int j=0; j < m; j++) {
 //      array1<double> Aij=Ai[j];
 // For 1D arrays: many compilers optimize array1<>::opt better than array1<>.
       vector Aij=Ai[j];
-      
+
       for(int k=0; k < p; k++) {
 // The following statements are equivalent, but the first one optimizes better.
         sum=sum+Aij[k];
@@ -49,43 +49,43 @@ int main()
       }
     }
   }
-        
+
   cout << sum << endl;
-        
-  f(A); 
-  g(A); 
-        
+
+  f(A);
+  g(A);
+
   vector x;
   Allocate(x,1);
   x[0]=1.0;
   cout << h<double>(x) << endl;
 
   cout << endl;
-        
+
 // Arrays with offsets:
-        
+
   const int offx=-1;
   const int offy=-1;
-        
+
   Array1<double> B(n,offx); // B(offx)...B(n-1+offx)
   Array2<double> C(n,m,offx,offy);
   Array1<double> D(5);      // Functionally equivalent to array1<double> D(n);
-        
+
   B=1.0;
   C=2.0;
   D=3.0;
 
   for(int i=offx; i < n+offx; i++) cout << B[i] << endl;
-        
+
   cout << endl;
-        
+
   for(int i=offx; i < n+offx; i++) {
     Vector Ci=C[i];
-    for(int j=offy; j < m+offy; j++) 
+    for(int j=offy; j < m+offy; j++)
       cout << Ci[j] << endl;
   }
 
   cout << D << endl;
-  
+
   return 0;
 }
diff --git a/Array.h b/Array.h
index 37997da5..ace4c68e 100644
--- a/Array.h
+++ b/Array.h
@@ -68,13 +68,13 @@ namespace Array {
 inline std::ostream& _newl(std::ostream& s) {s << '\n'; return s;}
 
 inline void ArrayExit(const char *x);
-  
+
 #ifndef __ExternalArrayExit
 inline void ArrayExit(const char *x)
 {
   std::cerr << _newl << "ERROR: " << x << "." << std::endl;
   exit(1);
-} 
+}
 #endif
 
 #ifndef __fftwpp_h__
@@ -106,9 +106,9 @@ inline void newAlign(T *&v, size_t len, size_t align)
   const char *nomem="Memory limits exceeded";
 #ifdef HAVE_POSIX_MEMALIGN
   int rc=posix_memalign(&mem,align,len*sizeof(T));
-#else  
+#else
   int rc=posix_memalign0(&mem,align,len*sizeof(T));
-#endif  
+#endif
   if(rc == EINVAL) Array::ArrayExit(invalid);
   if(rc == ENOMEM) Array::ArrayExit(nomem);
   v=(T *) mem;
@@ -124,7 +124,7 @@ inline void deleteAlign(T *v, size_t len)
   free(v);
 #else
   free0(v);
-#endif  
+#endif
 }
 
 #endif
@@ -140,7 +140,7 @@ class array1 {
   virtual unsigned int Size() const {return size;}
   void CheckSize() const {
     if(!test(allocated) && size == 0)
-      ArrayExit("Operation attempted on unallocated array"); 
+      ArrayExit("Operation attempted on unallocated array");
   }
   void CheckEqual(int a, int b, unsigned int dim, unsigned int m) const {
     if(a != b) {
@@ -152,7 +152,7 @@ class array1 {
       ArrayExit(s.c_str());
     }
   }
-        
+
   int test(int flag) const {return state & flag;}
   void clear(int flag) const {state &= ~flag;}
   void set(int flag) const {state |= flag;}
@@ -187,17 +187,17 @@ class array1 {
   void CheckActivate(size_t align=0) {
     __checkActivate(1,align);
   }
-  
+
   void Allocate(unsigned int nx0, size_t align=0) {
     Dimension(nx0);
     CheckActivate(align);
   }
-  
+
   void Reallocate(unsigned int nx0, size_t align=0) {
     Deallocate();
     Allocate(nx0,align);
   }
-  
+
   array1() : v(NULL), size(0), state(unallocated) {}
   array1(const void *) : size(0), state(unallocated) {}
   array1(unsigned int nx0, size_t align=0) : state(unallocated) {
@@ -209,11 +209,11 @@ class array1 {
                                state(A.test(temporary)) {}
 
   virtual ~array1() {Deallocate();}
-        
+
   void Freeze() {state=unallocated;}
   void Hold() {if(test(allocated)) {state=temporary;}}
   void Purge() const {if(test(temporary)) {Deallocate(); state=unallocated;}}
-        
+
   virtual void Check(int i, int n, unsigned int dim, unsigned int m,
                      int o=0) const {
     if(i < 0 || i >= n) {
@@ -231,22 +231,22 @@ class array1 {
       ArrayExit(s.c_str());
     }
   }
-        
+
   unsigned int Nx() const {return size;}
-  
+
 #ifdef NDEBUG
   typedef T *opt;
 #else
   typedef array1<T> opt;
 #endif
-  
+
   T& operator [] (int ix) const {__check(ix,size,1,1); return v[ix];}
   T& operator () (int ix) const {__check(ix,size,1,1); return v[ix];}
   T* operator () () const {return v;}
   operator T* () const {return v;}
-        
+
   array1<T> operator + (int i) const {return array1<T>(size-i,v+i);}
-        
+
   void Load(T a) const {
     __checkSize();
     for(unsigned int i=0; i < size; i++) v[i]=a;
@@ -260,25 +260,25 @@ class array1 {
   void Set(T *a) {v=a; clear(allocated);}
   T Min() {
     if(size == 0)
-      ArrayExit("Cannot take minimum of empty array"); 
+      ArrayExit("Cannot take minimum of empty array");
     T min=v[0];
     for(unsigned int i=1; i < size; i++) if(v[i] < min) min=v[i];
     return min;
   }
   T Max() {
     if(size == 0)
-      ArrayExit("Cannot take maximum of empty array"); 
+      ArrayExit("Cannot take maximum of empty array");
     T max=v[0];
     for(unsigned int i=1; i < size; i++) if(v[i] > max) max=v[i];
     return max;
   }
-  
+
   std::istream& Input (std::istream &s) const {
     __checkSize();
     for(unsigned int i=0; i < size; i++) s >> v[i];
     return s;
   }
-        
+
   array1<T>& operator = (T a) {Load(a); return *this;}
   array1<T>& operator = (const T *a) {Load(a); return *this;}
   array1<T>& operator = (const array1<T>& A) {
@@ -290,7 +290,7 @@ class array1 {
     A.Purge();
     return *this;
   }
-        
+
   array1<T>& operator += (const array1<T>& A) {
     __checkSize();
     for(unsigned int i=0; i < size; i++) v[i] += A(i);
@@ -311,7 +311,7 @@ class array1 {
     for(unsigned int i=0; i < size; i++) v[i] /= A(i);
     return *this;
   }
-        
+
   array1<T>& operator += (T a) {
     __checkSize();
     for(unsigned int i=0; i < size; i++) v[i] += a;
@@ -333,7 +333,7 @@ class array1 {
     for(unsigned int i=0; i < size; i++) v[i] *= ainv;
     return *this;
   }
-        
+
   double L1() const {
     __checkSize();
     double norm=0.0;
@@ -368,7 +368,7 @@ class array1 {
     }
     return norm;
   }
-#endif  
+#endif
 };
 
 template<class T>
@@ -379,7 +379,7 @@ void swaparray(T& A, T& B)
   A.Dimension(B);
   B.Dimension(C);
 }
-  
+
 template<class T>
 void leftshiftarray(T& A, T& B, T& C)
 {
@@ -389,7 +389,7 @@ void leftshiftarray(T& A, T& B, T& C)
   B.Dimension(C);
   C.Dimension(D);
 }
-  
+
 template<class T>
 void rightshiftarray(T& A, T& B, T& C)
 {
@@ -399,7 +399,7 @@ void rightshiftarray(T& A, T& B, T& C)
   B.Dimension(A);
   A.Dimension(D);
 }
-  
+
 template<class T>
 std::ostream& operator << (std::ostream& s, const array1<T>& A)
 {
@@ -423,7 +423,7 @@ class array2 : public array1<T> {
   unsigned int ny;
 public:
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0) {
     nx=nx0; ny=ny0;
     this->size=nx*ny;
@@ -433,19 +433,19 @@ class array2 : public array1<T> {
     this->v=v0;
     this->clear(this->allocated);
   }
-  void Dimension(const array1<T> &A) {ArrayExit("Operation not implemented");} 
-  
+  void Dimension(const array1<T> &A) {ArrayExit("Operation not implemented");}
+
   void Allocate(unsigned int nx0, unsigned int ny0, size_t align=0) {
     Dimension(nx0,ny0);
     __checkActivate(2,align);
   }
-        
+
   array2() : nx(0), ny(0) {}
   array2(unsigned int nx0, unsigned int ny0, size_t align=0) {
     Allocate(nx0,ny0,align);
   }
   array2(unsigned int nx0, unsigned int ny0, T *v0) {Dimension(nx0,ny0,v0);}
-        
+
   unsigned int Nx() const {return nx;}
   unsigned int Ny() const {return ny;}
 
@@ -469,7 +469,7 @@ class array2 : public array1<T> {
     return this->v[i];
   }
   T* operator () () const {return this->v;}
-        
+
   array2<T>& operator = (T a) {this->Load(a); return *this;}
   array2<T>& operator = (T *a) {this->Load(a); return *this;}
   array2<T>& operator = (const array2<T>& A) {
@@ -479,7 +479,7 @@ class array2 : public array1<T> {
     A.Purge();
     return *this;
   }
-        
+
   array2<T>& operator += (const array2<T>& A) {
     __checkSize();
     for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i);
@@ -491,7 +491,7 @@ class array2 : public array1<T> {
     return *this;
   }
   array2<T>& operator *= (const array2<T>& A);
-        
+
   array2<T>& operator += (T a) {
     __checkSize();
     unsigned int inc=ny+1;
@@ -509,7 +509,7 @@ class array2 : public array1<T> {
     for(unsigned int i=0; i < this->size; i++) this->v[i] *= a;
     return *this;
   }
-  
+
   void Identity() {
     this->Load((T) 0);
     __checkSize();
@@ -547,7 +547,7 @@ class array3 : public array1<T> {
   unsigned int nyz;
 public:
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0) {
     nx=nx0; ny=ny0; nz=nz0; nyz=ny*nz;
     this->size=nx*nyz;
@@ -557,13 +557,13 @@ class array3 : public array1<T> {
     this->v=v0;
     this->clear(this->allocated);
   }
-        
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 size_t align=0) {
     Dimension(nx0,ny0,nz0);
     __checkActivate(3,align);
   }
-  
+
   array3() : nx(0), ny(0), nz(0), nyz(0) {}
   array3(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          size_t align=0) {
@@ -572,7 +572,7 @@ class array3 : public array1<T> {
   array3(unsigned int nx0, unsigned int ny0, unsigned int nz0, T *v0) {
     Dimension(nx0,ny0,nz0,v0);
   }
-        
+
   unsigned int Nx() const {return nx;}
   unsigned int Ny() const {return ny;}
   unsigned int Nz() const {return nz;}
@@ -592,7 +592,7 @@ class array3 : public array1<T> {
     return this->v[i];
   }
   T* operator () () const {return this->v;}
-        
+
   array3<T>& operator = (T a) {this->Load(a); return *this;}
   array3<T>& operator = (T *a) {this->Load(a); return *this;}
   array3<T>& operator = (const array3<T>& A) {
@@ -600,10 +600,10 @@ class array3 : public array1<T> {
     __checkEqual(ny,A.Ny(),3,2);
     __checkEqual(nz,A.Nz(),3,3);
     this->Load(A());
-    A.Purge(); 
+    A.Purge();
     return *this;
   }
-        
+
   array3<T>& operator += (array3<T>& A) {
     __checkSize();
     for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i);
@@ -614,7 +614,7 @@ class array3 : public array1<T> {
     for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i);
     return *this;
   }
-        
+
   array3<T>& operator += (T a) {
     __checkSize();
     unsigned int inc=nyz+nz+1;
@@ -664,7 +664,7 @@ class array4 : public array1<T> {
   unsigned int nyzw;
 public:
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                  unsigned int nw0) {
     nx=nx0; ny=ny0; nz=nz0; nw=nw0; nzw=nz*nw; nyzw=ny*nzw;
@@ -676,13 +676,13 @@ class array4 : public array1<T> {
     this->v=v0;
     this->clear(this->allocated);
   }
-        
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 unsigned int nw0, size_t align=0) {
     Dimension(nx0,ny0,nz0,nw0);
     __checkActivate(4,align);
   }
-  
+
   array4() : nx(0), ny(0), nz(0), nw(0), nyz(0), nzw(0), nyzw(0) {}
   array4(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          unsigned int nw0, size_t align=0) {Allocate(nx0,ny0,nz0,nw0,align);}
@@ -712,7 +712,7 @@ class array4 : public array1<T> {
     return this->v[i];
   }
   T* operator () () const {return this->v;}
-        
+
   array4<T>& operator = (T a) {this->Load(a); return *this;}
   array4<T>& operator = (T *a) {this->Load(a); return *this;}
   array4<T>& operator = (const array4<T>& A) {
@@ -724,7 +724,7 @@ class array4 : public array1<T> {
     A.Purge();
     return *this;
   }
-        
+
   array4<T>& operator += (array4<T>& A) {
     __checkSize();
     for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i);
@@ -735,7 +735,7 @@ class array4 : public array1<T> {
     for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i);
     return *this;
   }
-        
+
   array4<T>& operator += (T a) {
     __checkSize();
     unsigned int inc=nyzw+nzw+nw+1;
@@ -765,7 +765,7 @@ std::ostream& operator << (std::ostream& s, const array4<T>& A)
       s << _newl;
     }
     s << _newl;
-  }     
+  }
   s << std::flush;
   return s;
 }
@@ -789,7 +789,7 @@ class array5 : public array1<T> {
   unsigned int nyzwv;
 public:
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                  unsigned int nw0, unsigned int nv0) {
     nx=nx0; ny=ny0; nz=nz0; nw=nw0; nv=nv0; nwv=nw*nv; nzwv=nz*nwv;
@@ -802,13 +802,13 @@ class array5 : public array1<T> {
     this->v=v0;
     this->clear(this->allocated);
   }
-        
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 unsigned int nw0, unsigned int nv0, size_t align=0) {
     Dimension(nx0,ny0,nz0,nw0,nv0);
     __checkActivate(5,align);
   }
-  
+
   array5() : nx(0), ny(0), nz(0), nw(0), nv(0), nwv(0), nzwv(0), nyzwv(0) {}
   array5(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          unsigned int nw0, unsigned int nv0, size_t align=0) {
@@ -842,7 +842,7 @@ class array5 : public array1<T> {
     return this->v[i];
   }
   T* operator () () const {return this->v;}
-        
+
   array5<T>& operator = (T a) {this->Load(a); return *this;}
   array5<T>& operator = (T *a) {this->Load(a); return *this;}
   array5<T>& operator = (const array5<T>& A) {
@@ -855,7 +855,7 @@ class array5 : public array1<T> {
     A.Purge();
     return *this;
   }
-        
+
   array5<T>& operator += (array5<T>& A) {
     __checkSize();
     for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i);
@@ -866,7 +866,7 @@ class array5 : public array1<T> {
     for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i);
     return *this;
   }
-        
+
   array5<T>& operator += (T a) {
     __checkSize();
     unsigned int inc=nyzwv+nzwv+nwv+nv+1;
@@ -899,7 +899,7 @@ std::ostream& operator << (std::ostream& s, const array5<T>& A)
       s << _newl;
     }
     s << _newl;
-  }     
+  }
   s << std::flush;
   return s;
 }
@@ -928,7 +928,7 @@ class Array1 : public array1<T> {
     voff=this->v-ox;
   }
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, int ox0=0) {
     this->size=nx0;
     ox=ox0;
@@ -948,7 +948,7 @@ class Array1 : public array1<T> {
     __checkActivate(1,align);
     Offsets();
   }
-        
+
   void Reallocate(unsigned int nx0, int ox0=0, size_t align=0) {
     this->Deallocate();
     Allocate(nx0,ox0,align);
@@ -969,16 +969,16 @@ class Array1 : public array1<T> {
   typedef T *opt;
 #else
   typedef Array1<T> opt;
-#endif  
-  
+#endif
+
   T& operator [] (int ix) const {__check(ix,this->size,ox,1,1); return voff[ix];}
   T& operator () (int i) const {__check(i,this->size,0,1,1); return this->v[i];}
   T* operator () () const {return this->v;}
   operator T* () const {return this->v;}
-        
+
   Array1<T> operator + (int i) const {return Array1<T>(this->size-i,this->v+i,ox);}
   void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);}
-        
+
   Array1<T>& operator = (T a) {this->Load(a); return *this;}
   Array1<T>& operator = (const T *a) {this->Load(a); return *this;}
   Array1<T>& operator = (const Array1<T>& A) {
@@ -995,7 +995,7 @@ class Array1 : public array1<T> {
     A.Purge();
     return *this;
   }
-  
+
   int Ox() const {return ox;}
 };
 
@@ -1010,7 +1010,7 @@ class Array2 : public array2<T> {
     voff=vtemp-oy;
   }
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, int ox0=0, int oy0=0) {
     this->nx=nx0; this->ny=ny0;
     this->size=this->nx*this->ny;
@@ -1023,7 +1023,7 @@ class Array2 : public array2<T> {
     Dimension(nx0,ny0,ox0,oy0);
     this->clear(this->allocated);
   }
-  
+
   void Allocate(unsigned int nx0, unsigned int ny0, int ox0=0, int oy0=0,
                 size_t align=0) {
     Dimension(nx0,ny0,ox0,oy0);
@@ -1050,7 +1050,7 @@ class Array2 : public array2<T> {
     return Array1<T>(this->ny,vtemp+ix*(int) this->ny,oy);
   }
 #endif
-  
+
   T& operator () (int ix, int iy) const {
     __check(ix,this->nx,ox,2,1);
     __check(iy,this->ny,oy,2,2);
@@ -1062,7 +1062,7 @@ class Array2 : public array2<T> {
   }
   T* operator () () const {return this->v;}
   void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);}
-        
+
   Array2<T>& operator = (T a) {this->Load(a); return *this;}
   Array2<T>& operator = (T *a) {this->Load(a); return *this;}
   Array2<T>& operator = (const Array2<T>& A) {
@@ -1083,10 +1083,10 @@ class Array2 : public array2<T> {
     A.Purge();
     return *this;
   }
-        
+
   int Ox() const {return ox;}
   int Oy() const {return oy;}
-  
+
 };
 
 template<class T>
@@ -1100,7 +1100,7 @@ class Array3 : public array3<T> {
     voff=vtemp-oy*(int) this->nz-oz;
   }
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                  int ox0=0, int oy0=0, int oz0=0) {
     this->nx=nx0; this->ny=ny0; this->nz=nz0; this->nyz=this->ny*this->nz;
@@ -1114,14 +1114,14 @@ class Array3 : public array3<T> {
     Dimension(nx0,ny0,nz0,ox0,oy0,oz0);
     this->clear(this->allocated);
   }
-  
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 int ox0=0, int oy0=0, int oz0=0, size_t align=0) {
     Dimension(nx0,ny0,nz0,ox0,oy0,oz0);
     __checkActivate(3,align);
     Offsets();
   }
-        
+
   Array3() : ox(0), oy(0), oz(0) {}
   Array3(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          int ox0=0, int oy0=0, int oz0=0, size_t align=0) {
@@ -1131,7 +1131,7 @@ class Array3 : public array3<T> {
          int ox0=0, int oy0=0, int oz0=0) {
     Dimension(nx0,ny0,nz0,v0,ox0,oy0,oz0);
   }
-        
+
   Array2<T> operator [] (int ix) const {
     __check(ix,this->nx,ox,3,1);
     return Array2<T>(this->ny,this->nz,vtemp+ix*(int) this->nyz,oy,oz);
@@ -1148,7 +1148,7 @@ class Array3 : public array3<T> {
   }
   T* operator () () const {return this->v;}
   void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);}
-        
+
   Array3<T>& operator = (T a) {this->Load(a); return *this;}
   Array3<T>& operator = (T *a) {this->Load(a); return *this;}
   Array3<T>& operator = (const Array3<T>& A) {
@@ -1159,7 +1159,7 @@ class Array3 : public array3<T> {
     __checkEqual(oy,A.Oy(),3,2);
     __checkEqual(oz,A.Oz(),3,3);
     this->Load(A());
-    A.Purge(); 
+    A.Purge();
     return *this;
   }
   Array3<T>& operator = (const array3<T>& A) {
@@ -1170,10 +1170,10 @@ class Array3 : public array3<T> {
     __checkEqual(oy,0,3,2);
     __checkEqual(oz,0,3,3);
     this->Load(A());
-    A.Purge(); 
+    A.Purge();
     return *this;
   }
-  
+
   int Ox() const {return ox;}
   int Oy() const {return oy;}
   int Oz() const {return oz;}
@@ -1191,9 +1191,9 @@ class Array4 : public array4<T> {
     voff=vtemp-oy*(int) this->nzw-oz*(int) this->nw-ow;
   }
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0,
-                 unsigned int nw0, 
+                 unsigned int nw0,
                  int ox0=0, int oy0=0, int oz0=0, int ow0=0) {
     this->nx=nx0; this->ny=ny0; this->nz=nz0; this->nw=nw0;
     this->nzw=this->nz*this->nw; this->nyzw=this->ny*this->nzw;
@@ -1208,15 +1208,15 @@ class Array4 : public array4<T> {
     Dimension(nx0,ny0,nz0,nw0,ox0,oy0,oz0,ow0);
     this->clear(this->allocated);
   }
-  
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 unsigned int nw0,
                 int ox0=0, int oy0=0, int oz0=0, int ow0=0, size_t align=0) {
     Dimension(nx0,ny0,nz0,nw0,ox0,oy0,oz0,ow0);
-    __checkActivate(4,align); 
+    __checkActivate(4,align);
     Offsets();
   }
-        
+
   Array4() : ox(0), oy(0), oz(0), ow(0) {}
   Array4(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          unsigned int nw0,
@@ -1247,10 +1247,10 @@ class Array4 : public array4<T> {
   }
   T* operator () () const {return this->v;}
   void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);}
-        
+
   Array4<T>& operator = (T a) {this->Load(a); return *this;}
   Array4<T>& operator = (T *a) {this->Load(a); return *this;}
-  
+
   Array4<T>& operator = (const Array4<T>& A) {
     __checkEqual(this->nx,A.Nx(),4,1);
     __checkEqual(this->ny,A.Ny(),4,2);
@@ -1281,7 +1281,7 @@ class Array4 : public array4<T> {
     A.Purge();
     return *this;
   }
-  
+
   int Ox() const {return ox;}
   int Oy() const {return oy;}
   int Oz() const {return oz;}
@@ -1299,7 +1299,7 @@ class Array5 : public array5<T> {
     voff=vtemp-oy*(int) this->nzwv-oz*(int) this->nwv-ow*(int) this->nv-ov;
   }
   using array1<T>::Dimension;
-  
+
   void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                  unsigned int nw0,  unsigned int nv0,
                  int ox0=0, int oy0=0, int oz0=0, int ow0=0, int ov0=0) {
@@ -1317,16 +1317,16 @@ class Array5 : public array5<T> {
     Dimension(nx0,ny0,nz0,nw0,nv0,ox0,oy0,oz0,ow0,ov0);
     this->clear(this->allocated);
   }
-  
+
   void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0,
                 unsigned int nw0, unsigned int nv0,
                 int ox0=0, int oy0=0, int oz0=0, int ow0=0, int ov0=0,
                 size_t align=0) {
     Dimension(nx0,ny0,nz0,nw0,nv0,ox0,oy0,oz0,ow0,ov0);
-    __checkActivate(5,align); 
+    __checkActivate(5,align);
     Offsets();
   }
-        
+
   Array5() : ox(0), oy(0), oz(0), ow(0), ov(0) {}
   Array5(unsigned int nx0, unsigned int ny0, unsigned int nz0,
          unsigned int nw0, unsigned int nv0, int ox0=0, int oy0=0,
@@ -1359,10 +1359,10 @@ class Array5 : public array5<T> {
   }
   T* operator () () const {return this->v;}
   void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);}
-        
+
   Array5<T>& operator = (T a) {this->Load(a); return *this;}
   Array5<T>& operator = (T *a) {this->Load(a); return *this;}
-  
+
   Array5<T>& operator = (const Array5<T>& A) {
     __checkEqual(this->nx,A.Nx(),5,1);
     __checkEqual(this->ny,A.Ny(),5,2);
@@ -1540,13 +1540,13 @@ inline void Allocate(T *&A, unsigned int n, size_t align=0)
 
 template<class T>
 inline void Allocate(array1<T>& A, unsigned int n, size_t align=0)
-{  
+{
   A.Allocate(n,align);
 }
 
 template<class T>
 inline void Allocate(Array1<T>& A, unsigned int n, size_t align=0)
-{  
+{
   A.Allocate(n,align);
 }
 
@@ -1559,7 +1559,7 @@ inline void Allocate(T *&A, unsigned int n, int o, size_t align=0)
 
 template<class T>
 inline void Allocate(Array1<T>& A, unsigned int n, int o, size_t align=0)
-{  
+{
   A.Allocate(n,o,align);
 }
 
@@ -1571,13 +1571,13 @@ inline void Deallocate(T *A)
 
 template<class T>
 inline void Deallocate(array1<T>& A)
-{  
+{
   A.Deallocate();
 }
 
 template<class T>
 inline void Deallocate(Array1<T>& A)
-{  
+{
   A.Deallocate();
 }
 
@@ -1589,32 +1589,32 @@ inline void Deallocate(T *A, int o)
 
 template<class T>
 inline void Deallocate(Array1<T>& A, int)
-{  
+{
   A.Deallocate();
 }
 
 template<class T>
 inline void Reallocate(T *&A, unsigned int n, size_t align=0)
-{  
+{
   if(A) delete [] A;
   Allocate(A,n,align);
 }
 
 template<class T>
 inline void Reallocate(array1<T>& A, unsigned int n)
-{  
+{
   A.Reallocate(n);
 }
 
 template<class T>
 inline void Reallocate(Array1<T>& A, unsigned int n)
-{  
+{
   A.Reallocate(n);
 }
 
 template<class T>
 inline void Reallocate(T *&A, unsigned int n, int o, size_t align=0)
-{  
+{
   if(A) delete [] A;
   Allocate(A,n,align);
   A -= o;
@@ -1622,7 +1622,7 @@ inline void Reallocate(T *&A, unsigned int n, int o, size_t align=0)
 
 template<class T>
 inline void Reallocate(Array1<T>& A, unsigned int n, int o, size_t align=0)
-{  
+{
   A.Reallocate(n,o,align);
 }
 
diff --git a/Complex.h b/Complex.h
index 67edc26a..a56f2513 100644
--- a/Complex.h
+++ b/Complex.h
@@ -1,4 +1,4 @@
-/* 
+/*
    Copyright (C) 1988 Free Software Foundation
    written by Doug Lea (dl@rocky.oswego.edu)
 
@@ -43,65 +43,65 @@ class Complex
   Complex() {}
   Complex(double r, double i=0) : re(r), im(i) {}
   Complex(const Complex& y) : re(y.re), im(y.im) {}
-        
+
   ~Complex() {}
 
   double real() const {return re;}
   double imag() const {return im;}
 
   const Complex& operator = (const Complex& y);
-        
+
   const Complex& operator += (const Complex& y);
   const Complex& operator += (double y);
   const Complex& operator -= (const Complex& y);
   const Complex& operator -= (double y);
   const Complex& operator *= (const Complex& y);
   const Complex& operator *= (double y);
-  const Complex& operator /= (const Complex& y); 
-  const Complex& operator /= (double y); 
-        
+  const Complex& operator /= (const Complex& y);
+  const Complex& operator /= (double y);
+
   void error(char* msg) const;
 };
 
 // inline members
 
-inline const Complex& Complex::operator = (const Complex& y) 
+inline const Complex& Complex::operator = (const Complex& y)
 
-{ 
-  re = y.re; im = y.im; return *this; 
-} 
+{
+  re = y.re; im = y.im; return *this;
+}
 
 inline const Complex& Complex::operator += (const Complex& y)
-{ 
-  re += y.re;  im += y.im; return *this; 
+{
+  re += y.re;  im += y.im; return *this;
 }
 
 inline const Complex& Complex::operator += (double y)
-{ 
-  re += y; return *this; 
+{
+  re += y; return *this;
 }
 
 inline const Complex& Complex::operator -= (const Complex& y)
-{ 
-  re -= y.re;  im -= y.im; return *this; 
+{
+  re -= y.re;  im -= y.im; return *this;
 }
 
 inline const Complex& Complex::operator -= (double y)
-{ 
-  re -= y; return *this; 
+{
+  re -= y; return *this;
 }
 
 inline const Complex& Complex::operator *= (const Complex& y)
-{  
+{
   double r = re * y.re - im * y.im;
-  im = re * y.im + im * y.re; 
-  re = r; 
-  return *this; 
+  im = re * y.im + im * y.re;
+  re = r;
+  return *this;
 }
 
 inline const Complex& Complex::operator *= (double y)
-{  
-  re *= y; im *= y; return *this; 
+{
+  re *= y; im *= y; return *this;
 }
 
 inline const Complex& Complex::operator /= (const Complex& y)
@@ -313,9 +313,9 @@ inline bool isfinite(const Complex& z)
 {
 #ifdef _WIN32
   return _finite(z.re) && _finite(z.im);
-#else  
+#else
   return !(std::isinf(z.re) || std::isnan(z.re) || std::isinf(z.im) || std::isnan(z.im));
-#endif  
+#endif
 }
 
 #endif
diff --git a/align.h b/align.h
index f6f2e94f..fe891deb 100644
--- a/align.h
+++ b/align.h
@@ -18,8 +18,8 @@
 #ifdef __Array_h__
 
 namespace Array {
-static const array1<Complex> NULL1;  
-static const array2<Complex> NULL2;  
+static const array1<Complex> NULL1;
+static const array2<Complex> NULL2;
 static const array3<Complex> NULL3;
 }
 
@@ -64,9 +64,9 @@ inline void newAlign(T *&v, size_t len, size_t align)
   const char *nomem="Memory limits exceeded";
 #ifdef HAVE_POSIX_MEMALIGN
   int rc=posix_memalign(&mem,align,len*sizeof(T));
-#else  
+#else
   int rc=posix_memalign0(&mem,align,len*sizeof(T));
-#endif  
+#endif
   if(rc == EINVAL) std::cerr << invalid << std::endl;
   if(rc == ENOMEM) std::cerr << nomem << std::endl;
   v=(T *) mem;
@@ -81,7 +81,7 @@ inline void deleteAlign(T *v, size_t len)
   free(v);
 #else
   free0(v);
-#endif  
+#endif
 }
 }
 
@@ -115,7 +115,7 @@ inline void deleteAlign(T *p)
   free(p);
 #else
   Array::free0(p);
-#endif  
+#endif
 }
 
 }
diff --git a/cmult-sse2.h b/cmult-sse2.h
index 80595b1c..66858a62 100644
--- a/cmult-sse2.h
+++ b/cmult-sse2.h
@@ -32,42 +32,42 @@ union uvec {
   unsigned u[4];
   Vec v;
 };
-  
+
 extern const union uvec sse2_pm;
 extern const union uvec sse2_mm;
 
 #if defined(__INTEL_COMPILER) || !defined(__GNUC__)
-static inline Vec operator -(const Vec& a) 
+static inline Vec operator -(const Vec& a)
 {
   return _mm_xor_pd(sse2_mm.v,a);
 }
 
-static inline Vec operator +(const Vec& a, const Vec& b) 
+static inline Vec operator +(const Vec& a, const Vec& b)
 {
   return _mm_add_pd(a,b);
 }
 
-static inline Vec operator -(const Vec& a, const Vec& b) 
+static inline Vec operator -(const Vec& a, const Vec& b)
 {
   return _mm_sub_pd(a,b);
 }
 
-static inline Vec operator *(const Vec& a, const Vec& b) 
+static inline Vec operator *(const Vec& a, const Vec& b)
 {
   return _mm_mul_pd(a,b);
 }
 
-static inline void operator +=(Vec& a, const Vec& b) 
+static inline void operator +=(Vec& a, const Vec& b)
 {
   a=_mm_add_pd(a,b);
 }
 
-static inline void operator -=(Vec& a, const Vec& b) 
+static inline void operator -=(Vec& a, const Vec& b)
 {
   a=_mm_sub_pd(a,b);
 }
 
-static inline void operator *=(Vec& a, const Vec& b) 
+static inline void operator *=(Vec& a, const Vec& b)
 {
   a=_mm_mul_pd(a,b);
 }
@@ -113,32 +113,32 @@ class Vec {
 public:
   double x;
   double y;
-  
+
   Vec() {};
   Vec(double x, double y) : x(x), y(y) {};
   Vec(const Vec &v) : x(v.x), y(v.y) {};
   Vec(const Complex &z) : x(z.re), y(z.im) {};
-  
+
   const Vec& operator += (const Vec& v) {
-    x += v.x; 
-    y += v.y; 
+    x += v.x;
+    y += v.y;
     return *this;
   }
-  
+
   const Vec& operator -= (const Vec& v) {
-    x -= v.x; 
-    y -= v.y; 
+    x -= v.x;
+    y -= v.y;
     return *this;
   }
-  
+
   const Vec& operator *= (const Vec& v) {
-    x *= v.x; 
-    y *= v.y; 
+    x *= v.x;
+    y *= v.y;
     return *this;
   }
 };
 
-static inline Vec operator -(const Vec& a) 
+static inline Vec operator -(const Vec& a)
 {
   return Vec(-a.x,-a.y);
 }
diff --git a/convolution.cc b/convolution.cc
index 89ab3018..a4e99ca9 100644
--- a/convolution.cc
+++ b/convolution.cc
@@ -28,10 +28,10 @@ unsigned int BuildZeta(double arg, unsigned int m,
   unsigned int t=m/s;
   if(s*t < m) ++t;
   ZetaH=ComplexAlign(t);
-  
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
   for(unsigned int a=0; a < t; ++a) {
     double theta=s*a*arg;
     ZetaH[a]=Complex(cos(theta),sin(theta));
@@ -39,7 +39,7 @@ unsigned int BuildZeta(double arg, unsigned int m,
   ZetaL=ComplexAlign(s);
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
   for(unsigned int b=0; b < s; ++b) {
     double theta=b*arg;
     ZetaL[b]=Complex(cos(theta),sin(theta));
@@ -55,19 +55,19 @@ unsigned int BuildZeta(unsigned int n, unsigned int m,
 
 void ImplicitConvolution::convolve(Complex **F, multiplier *pmult,
                                    unsigned int i, unsigned int offset)
-{ 
+{
   if(indexsize >= 1) index[indexsize-1]=i;
-  
+
   unsigned int C=max(A,B);
   Complex *P[C];
   for(unsigned int a=0; a < C; ++a)
     P[a]=F[a]+offset;
-  
+
   // Backwards FFT (even indices):
   for(unsigned int a=0; a < A; ++a) {
     BackwardsO->fft(P[a],U[a]);
   }
-  
+
   if(A >= B)
     (*pmult)(U,m,indexsize,index,0,threads); // multiply even indices
 
@@ -82,14 +82,14 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult,
   if(A > B) { // U[A-1] is free
     Complex *W[A];
     W[A-1]=U[A-1];
-    for(unsigned int a=1; a < A; ++a) 
+    for(unsigned int a=1; a < A; ++a)
       W[a-1]=P[a];
 
     for(unsigned int a=A; a-- > 0;) // Loop from A-1 to 0.
       BackwardsO->fft(P[a],W[a]);
-    
+
     (*pmult)(W,m,indexsize,index,1,threads); // multiply odd indices
-    
+
     // Return to original space
     Complex *lastW=W[A-1];
     for(unsigned int b=0; b < B; ++b) {
@@ -98,24 +98,24 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult,
       ForwardsO->fft(U[b],lastW);
       posttransform(Pb,lastW);
     }
-    
+
   } else if(A < B) { // U[B-1] is free
     Complex *W[B];
     W[B-1]=U[B-1];
-    for(unsigned int b=1; b < B; ++b) 
+    for(unsigned int b=1; b < B; ++b)
       W[b-1]=P[b];
 
     for(unsigned int a=A; a-- > 0;) // Loop from A-1 to 0.
       BackwardsO->fft(P[a],W[a]);
-    
+
     (*pmult)(W,m,indexsize,index,1,threads); // multiply odd indices
-    
+
     // Return to original space
     for(unsigned int b=0; b < B; ++b)
       ForwardsO->fft(W[b],P[b]);
-    
+
     (*pmult)(U,m,indexsize,index,0,threads); // multiply even indices
-    
+
     Complex *f0=P[0];
     Complex *u0=U[0];
     Forwards->fft(u0);
@@ -127,7 +127,7 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult,
       ForwardsO->fft(ub,u0);
       posttransform(fb,u0);
     }
-    
+
   } else { // A == B
     // Backwards FFT (odd indices):
     for(unsigned int a=0; a < A; ++a)
@@ -218,7 +218,7 @@ pretransform<pretransform4>(Complex **F, unsigned int k, Vec& Zetak)
 // multiply by root of unity to prepare for inverse FFT for odd modes
 template<class T>
 void ImplicitConvolution::pretransform(Complex **F)
-{  
+{
   PARALLEL(
     for(unsigned int K=0; K < m; K += s) {
       Complex *ZetaL0=ZetaL-K;
@@ -259,7 +259,7 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U)
 {
   Vec Mhalf=LOAD(-0.5);
   Vec HSqrt3=LOAD(hsqrt3);
-  
+
   double Re=0.0, Im=0.0;
 
   unsigned int m1=m-1;
@@ -279,12 +279,12 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U)
     Vec A=ZMULTC(zeta1,UNPACKL(B,Fb)); // Optimize?
     B=ZMULTIC(zeta1,UNPACKH(B,Fb));
     STORE(f1c,CONJ(A+B));
-        
+
     double re=F[c].re;
     Re=2.0*re;
     Im=re+sqrt3*F[c].im;
   }
-  
+
   unsigned int c1=c+1;
   unsigned int d=c1/2;
   unsigned int a=c1/s;
@@ -306,21 +306,21 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U)
       for(unsigned int k=max(1,K); k < stop; ++k) {
         Vec zetak=ZMULT(X,Y,LOAD(ZetaL0+k));
         Vec Zetak=ZMULTC(zetak,Zetac1);
-          
+
         Vec Fa=LOAD(F+k);
         Vec FA=LOAD(fpc1-k);
         Vec FB=LOAD(fmc1+k);
         Vec Fb=LOAD(fm-k);
-          
+
         Vec b=Fb*Mhalf+CONJ(Fa);
         STORE(F+k,Fa+CONJ(Fb));
         Fb *= HSqrt3;
         Vec a=ZMULTC(zetak,UNPACKL(b,Fb));
         b=ZMULTIC(zetak,UNPACKH(b,Fb));
-        
+
         STORE(fmc1+k,CONJ(a+b));
         STORE(U+k,a-b);
-        
+
         b=FB*Mhalf+CONJ(FA);
         STORE(fpc1-k,FA+CONJ(FB));
         FB *= HSqrt3;
@@ -332,7 +332,7 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U)
       }
     }
     );
-    
+
   if(even) {
     F[c]=Re;
     U[c]=Im;
@@ -348,7 +348,7 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w,
   Vec Mhalf=LOAD(-0.5);
   Vec HSqrt3=LOAD(hsqrt3);
 
-  unsigned int m1=m-1;  
+  unsigned int m1=m-1;
   unsigned int c1=c+1;
   unsigned int d=c1/2;
   unsigned int a=c1/s;
@@ -377,7 +377,7 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w,
     F2=ZMULT(Zeta1,LOAD(U+c));
     STORE(f0+c,F0+F1+F2);
   }
-  
+
   unsigned int D=c-d;
   PARALLEL(
     for(unsigned int K=0; K <= D; K += s) {
@@ -393,27 +393,27 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w,
       for(unsigned int k=max(even+1,K); k < stop; ++k) {
         Vec zetak=ZMULT(X,Y,LOAD(ZetaL0+k));
         Vec Zetak=ZMULTC(zetak,Zetac1);
-          
+
         Vec F0=LOAD(F+k)*Ninv;
         Vec F1=ZMULTC(zetak,LOAD(fmc1+k));
         Vec F2=ZMULT(zetak,LOAD(U+k));
         Vec S=F1+F2;
         F2=CONJ(F0+Mhalf*S)-HSqrt3*FLIP(F1-F2);
-            
+
         Vec FA=LOAD(fpc1-k)*Ninv;
         Vec FB=ZMULTC(Zetak,LOAD(fm-k));
         Vec FC=ZMULT(Zetak,LOAD(upc1-k));
         Vec T=FB+FC;
-            
+
         STORE(F+k,F0+S);
         STORE(fpc1-k,FA+T);
         STORE(fmc1+k,CONJ(FA+Mhalf*T)-HSqrt3*FLIP(FB-FC));
         STORE(fm-k,F2);
-      }  
+      }
     }
     );
 
-  
+
   if(d == D+1) {
     unsigned int a=d/s;
     Vec Zeta=Ninv*LOAD(ZetaH+a);
@@ -433,7 +433,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
                                     unsigned int i, unsigned int offset)
 {
   if(indexsize >= 1) index[indexsize-1]=i;
-  
+
   // Set problem-size variables and pointers:
   unsigned int C=max(A,B);
 
@@ -448,8 +448,8 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
     c0[a]=f;
     c1[a]=f+start;
   }
-    
-  if(A != B) { 
+
+  if(A != B) {
     for(unsigned int a=0; a < C-1; ++a) {
       d0[a]=(double *) c0[a+1];
       d1[a]=(double *) c1[a+1];
@@ -477,7 +477,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
   }
 
   // Complex-to-real FFTs and pmults:
-  
+
   double Re[B],Im[B];
 
   // r=-1 (backwards):
@@ -506,7 +506,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
     crO->fft(c0a,d0[a]);
   }
   (*pmult)(d0,m,indexsize,index,0,threads);
-    
+
   for(unsigned int b=0; b < B; ++b) {
     Complex *c0b=c0[b];
     rcO->fft(d0[b],c0b);
@@ -515,7 +515,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
     Re[b]=z.re;
     Im[b]=z.im;
   }
-  
+
   if(even) {
     for(unsigned int a=C; a-- > 0;) { // Loop from C-1 to 0.
       Complex *c1a=c1[a];
@@ -524,7 +524,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
       c1a[1]=tmp;          // r=1, k=1
     }
   }
-  
+
   // r=1:
   for(unsigned int a=A; a-- > 0;) { // Loop from A-1 to 0.
     Complex *c1a=c1[a];
@@ -542,9 +542,9 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult,
       c1b[1]=tmp;    // r=0, k=c
     }
   }
-  
+
   const double ninv=1.0/(3.0*m);
-  
+
   // r=-1 (forwards):
   if(A > B) {
     for(unsigned int b=0; b < B; ++b) {
@@ -594,7 +594,7 @@ void fftpad::expand(Complex *f, Complex *u)
     }
     );
 }
-  
+
 void fftpad::backwards(Complex *f, Complex *u)
 {
   expand(f,u);
@@ -637,11 +637,11 @@ void fft0pad::expand(Complex *f, Complex *u)
   Complex *fm1stride=f+(m-1)*stride;
   for(unsigned int i=0; i < M; ++i)
     u[i]=fm1stride[i];
-    
+
   Vec Mhalf=LOAD(-0.5);
   Vec Mhsqrt3=LOAD(-hsqrt3);
   unsigned int stop=s;
-  
+
   for(unsigned int K=0; K < m; K += s) {
     Complex *ZetaL0=ZetaL-K;
     Vec H=LOAD(ZetaH+K/s);
@@ -700,7 +700,7 @@ void fft0pad::reduce(Complex *f, Complex *u)
   Vec Mhalf=LOAD(-0.5);
   Vec HSqrt3=LOAD(hsqrt3);
   Complex *fm1stride=f+(m-1)*stride;
-  
+
   unsigned int stop=s;
   for(unsigned int K=0; K < m; K += s) {
     Complex *ZetaL0=ZetaL-K;
@@ -724,7 +724,7 @@ void fft0pad::reduce(Complex *f, Complex *u)
     }
     stop=min(stop+s,m);
   }
-    
+
   for(unsigned int i=0; i < M; ++i)
     fm1stride[i]=umstride[i];
 }
@@ -733,7 +733,7 @@ void fft0pad::Forwards0(Complex *f)
 {
   Forwards->fft(f+(m-1)*stride);
 }
-  
+
 void fft0pad::Forwards1(Complex *f, Complex *u)
 {
   Complex *umstride=u+m*stride;
@@ -745,11 +745,11 @@ void fft0pad::Forwards1(Complex *f, Complex *u)
     fm1stride[i]=temp;
   }
 }
-  
+
 void fft0pad::forwards(Complex *f, Complex *u)
 {
-  Forwards0(f);  
-  Forwards1(f,u);  
+  Forwards0(f);
+  Forwards1(f,u);
   Forwards->fft(f);
   Forwards->fft(u);
   reduce(f,u);
@@ -763,7 +763,7 @@ void fft1pad::expand(Complex *f, Complex *u)
     f[i]=fmstride[i]+2.0*Nyquist;
     u[i]=fmstride[i] -= Nyquist;
   }
-    
+
   Vec Mhalf=LOAD(-0.5);
   Vec Mhsqrt3=LOAD(-hsqrt3);
   unsigned int inc=s;
@@ -783,7 +783,7 @@ void fft1pad::expand(Complex *f, Complex *u)
         for(unsigned int i=0; i < M; ++i) {
           Vec Fa=LOAD(fk+i);
           Vec Fb=LOAD(fmk+i);
-        
+
           Vec B=Fa*Mhalf+Fb;
           STORE(fk+i,Fa+Fb);
           Fa *= Mhsqrt3;
@@ -817,7 +817,7 @@ void fft1pad::reduce(Complex *f, Complex *u)
   Vec Ninv=LOAD(ninv);
   Vec Mhalf=LOAD(-0.5);
   Vec HSqrt3=LOAD(hsqrt3);
-  
+
   unsigned int inc=s;
   PARALLEL(
     for(unsigned int K=0; K < m; K += inc) {
@@ -893,7 +893,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C,
         double *ak=a+k;
         STORE(ak,LOAD(ak)*LOAD(b+k)*LOAD(C0+k)+
               LOAD(a1+k)*LOAD(b1+k)*LOAD(C1+k));
-      }  
+      }
       );
 #else
     PARALLEL(
@@ -930,7 +930,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C,
     double *C0=C[0];
     unsigned int stop=twom+offset;
 #ifdef __SSE2__
-    PARALLEL(   
+    PARALLEL(
       for(unsigned int k=offset; k < stop; k += 2) {
         double *p=A+k;
         double *q=B+k;
@@ -940,7 +940,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C,
           sum += LOAD(p+istride)*LOAD(q+istride)*LOAD(C[i]+k);
         }
         STORE(p,sum);
-      }   
+      }
       );
 #else
     PARALLEL(
@@ -965,7 +965,7 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H,
 {
   // 8M-3 of 8M FFTs are out-of-place
   Complex *w=W[0];
-    
+
   unsigned int m1=m+1;
   for(unsigned int i=0; i < M; ++i) {
     Complex *fi=F[i]+offset;
@@ -996,13 +996,13 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H,
           STORE(vi+k,ZMULT(Zetak,Gk));
           STORE(wi+k,ZMULT(Zetak,Hk));
         }
-      }  
+      }
       );
-      
+
     ui[m]=0.0;
     vi[m]=0.0;
     wi[m]=0.0;
-    
+
     if(i+1 < M) {
       cro->fft(ui,(double *) (ui-m1));
       cro->fft(vi,(double *) (vi-m1));
@@ -1012,8 +1012,8 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H,
       cr->fft(vi);
       cr->fft(wi);
     }
-  }   
-    
+  }
+
   mult((double *) v,(double *) u,(double **) W);
   rco->fft((double *) v,u); // v and w are now free
 
@@ -1029,11 +1029,11 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H,
     hi[m]=0.0;
     cro->fft(hi,(double *) gi);
   }
-  
+
   mult((double *) v,(double *) w,(double **) G,2*offset);
   Complex *f=F[0]+offset;
   rco->fft((double *) v,f);
-    
+
   double ninv=0.25/m;
   Vec Ninv=LOAD(ninv);
   PARALLEL(
@@ -1090,27 +1090,27 @@ void ImplicitHFGGConvolution::convolve(Complex *f, Complex *g,
         STORE(u+k,ZMULT(Zetak,Fk));
         STORE(v+k,ZMULT(Zetak,Gk));
       }
-    }  
+    }
     );
 
   u[m]=0.0;
   v[m]=0.0;
-    
+
   cr->fft(u);
   cr->fft(v);
-    
+
   mult((double *) v,(double *) u);
   rco->fft((double *) v,u); // v is now free
 
   g[m]=0.0;
   cro->fft(g,(double *) v);
-  
+
   f[m]=0.0;
   cro->fft(f,(double *) g);
-  
+
   mult((double *) v,(double *) g);
   rco->fft((double *) v,f);
-    
+
   double ninv=0.25/m;
 
   Vec Ninv=LOAD(ninv);
@@ -1139,10 +1139,10 @@ void ImplicitHFFFConvolution::mult(double *a)
       double *p=a+k;
       Vec ak=LOAD(p);
       STORE(p,ak*ak*ak);
-    }        
+    }
     );
 }
-  
+
 void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u)
 {
   PARALLEL(
@@ -1154,9 +1154,9 @@ void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u)
       Vec Y=UNPACKH(CONJ(Zeta),Zeta);
       for(unsigned int k=K; k < stop; ++k)
         STORE(u+k,ZMULT(ZMULT(X,Y,LOAD(ZetaL0+k)),LOAD(f+k)));
-    }  
+    }
     );
-    
+
   u[m]=0.0;
   cr->fft(u);
   mult((double *) u);
@@ -1178,7 +1178,7 @@ void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u)
       for(unsigned int k=K; k < stop; ++k) {
         Complex *p=f+k;
         STORE(p,ZMULTC(ZMULT(X,Y,LOAD(ZetaL0+k)),LOAD(u+k))+Ninv*LOAD(p));
-      } 
+      }
     }
     );
 }
@@ -1189,7 +1189,7 @@ void fft0bipad::backwards(Complex *f, Complex *u)
     f[i]=0.0;
   for(unsigned int i=0; i < M; ++i)
     u[i]=0.0;
-    
+
   unsigned int twom=2*m;
   PARALLEL(
     for(unsigned int K=0; K < twom; K += s) {
@@ -1208,7 +1208,7 @@ void fft0bipad::backwards(Complex *f, Complex *u)
       }
     }
     );
-    
+
   Backwards->fft(f);
   Backwards->fft(u);
 }
@@ -1251,7 +1251,7 @@ void multautocorrelation(Complex **F, unsigned int m,
                          unsigned int r, unsigned int threads)
 {
   Complex* F0=F[0];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1274,7 +1274,7 @@ void multcorrelation(Complex **F, unsigned int m,
 {
   Complex* F0=F[0];
   Complex* F1=F[1];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1293,7 +1293,7 @@ void multcorrelation(Complex **F, unsigned int m,
 
 // Unscramble indices, returning spatial index for remainder r at position j.
 inline unsigned innerindex(unsigned j, int r) {return 2*j+r;}
-  
+
 // This multiplication routine is for binary convolutions and takes two inputs
 // of size m.
 // F[0][j] *= F[1][j];
@@ -1304,7 +1304,7 @@ void multbinary(Complex **F, unsigned int m,
 {
   Complex* F0=F[0];
   Complex* F1=F[1];
-  
+
 #if 0 // Spatial indices are available, if needed.
   size_t n=indexsize;
   for(unsigned int j=0; j < m; ++j) {
@@ -1312,8 +1312,8 @@ void multbinary(Complex **F, unsigned int m,
       cout << index[d] << ",";
     cout << innerindex(j,r) << endl;
   }
-#endif  
-      
+#endif
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1336,7 +1336,7 @@ void multautoconvolution(Complex **F, unsigned int m,
                          unsigned int r, unsigned int threads)
 {
   Complex* F0=F[0];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1357,7 +1357,7 @@ inline unsigned innerindex(unsigned j, int r, unsigned int m) {
   int x=3*j+r;
   return x >= 0 ? x : 3*m-1;
 }
-  
+
 // This multiplication routine is for binary Hermitian convolutions and takes
 // two inputs.
 // F[0][j] *= F[1][j];
@@ -1368,7 +1368,7 @@ void multbinary(double **F, unsigned int m,
 {
   double* F0=F[0];
   double* F1=F[1];
-  
+
 #if 0 // Spatial indices are available, if needed.
   //size_t n=index.size();
   size_t n=indexsize;
@@ -1378,7 +1378,7 @@ void multbinary(double **F, unsigned int m,
     cout << innerindex(j,r,m) << endl;
   }
 #endif
-      
+
 #ifdef __SSE2__
   unsigned int m1=m-1;
   PARALLEL(
@@ -1407,7 +1407,7 @@ void multbinary2(Complex **F, unsigned int m,
   Complex* F1=F[1];
   Complex* F2=F[2];
   Complex* F3=F[3];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1434,7 +1434,7 @@ void multbinary2(double **F, unsigned int m,
   double* F1=F[1];
   double* F2=F[2];
   double* F3=F[3];
-  
+
 #ifdef __SSE2__
   unsigned int m1=m-1;
   PARALLEL(
@@ -1465,7 +1465,7 @@ void multbinary3(Complex **F, unsigned int m,
   Complex* F3=F[3];
   Complex* F4=F[4];
   Complex* F5=F[5];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1487,7 +1487,7 @@ void multbinary3(Complex **F, unsigned int m,
 
 // F[0][j]=F[0][j]*F[4][j]+F[1][j]*F[5][j]+F[2][j]*F[6][j]+F[3][j]*F[7][j];
 void multbinary4(Complex **F, unsigned int m,
-                 const unsigned int indexsize,           
+                 const unsigned int indexsize,
                  const unsigned int *index,
                  unsigned int r, unsigned int threads)
 {
@@ -1499,7 +1499,7 @@ void multbinary4(Complex **F, unsigned int m,
   Complex* F5=F[5];
   Complex* F6=F[6];
   Complex* F7=F[7];
-  
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1543,7 +1543,7 @@ void multbinary8(Complex **F, unsigned int m,
   Complex* F13=F[13];
   Complex* F14=F[14];
   Complex* F15=F[15];
-    
+
 #ifdef __SSE2__
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
@@ -1578,7 +1578,7 @@ void multadvection2(double **F, unsigned int m,
 {
   double* F0=F[0];
   double* F1=F[1];
-  
+
 #ifdef __SSE2__
   unsigned int m1=m-1;
   PARALLEL(
@@ -1604,7 +1604,7 @@ void multadvection2(double **F, unsigned int m,
     F0[j]=v*v-u*u;
     F1[j]=u*v;
   }
-#endif  
+#endif
 }
 
 } // namespace fftwpp
diff --git a/convolution.h b/convolution.h
index 7c75f15d..a1e64d4a 100644
--- a/convolution.h
+++ b/convolution.h
@@ -19,7 +19,7 @@
 #include "fftw++.h"
 #include "cmult-sse2.h"
 #include "transposeoptions.h"
-    
+
 namespace fftwpp {
 
 #ifndef __convolution_h__
@@ -67,28 +67,28 @@ struct convolveOptions {
   convolveOptions(unsigned int nx, unsigned int ny, unsigned int stride2,
                   utils::mpiOptions mpi, bool toplevel=true) :
     nx(nx), ny(ny), stride2(stride2), mpi(mpi), toplevel(toplevel) {}
-    
+
   convolveOptions(unsigned int ny, unsigned int nz,
                   unsigned int stride2, unsigned int stride3,
                   utils::mpiOptions mpi, bool toplevel=true) :
     ny(ny), nz(nz), stride2(stride2), stride3(stride3), mpi(mpi),
     toplevel(toplevel) {}
-  
+
   convolveOptions(bool toplevel=true) : nx(0), ny(0), nz(0),
                                         toplevel(toplevel) {}
 };
-    
+
 static const convolveOptions defaultconvolveOptions;
 
 typedef void multiplier(Complex **, unsigned int m,
                         const unsigned int indexsize,
                         const unsigned int *index,
-                        unsigned int r, unsigned int threads); 
+                        unsigned int r, unsigned int threads);
 typedef void realmultiplier(double **, unsigned int m,
                             const unsigned int indexsize,
                             const unsigned int *index,
-                            unsigned int r, unsigned int threads); 
-  
+                            unsigned int r, unsigned int threads);
+
 // Multipliers for binary convolutions.
 
 multiplier multautoconvolution;
@@ -128,15 +128,15 @@ class ImplicitConvolution : public ThreadBase {
   unsigned int indexsize;
 public:
   unsigned int *index;
-  
+
   void initpointers(Complex **&U, Complex *u) {
     unsigned int C=max(A,B);
     U=new Complex *[C];
-    for(unsigned int a=0; a < C; ++a) 
+    for(unsigned int a=0; a < C; ++a)
       U[a]=u+a*m;
     pointers=true;
   }
-  
+
   void deletepointers(Complex **&U) {
     delete [] U;
   }
@@ -145,17 +145,17 @@ class ImplicitConvolution : public ThreadBase {
     indexsize=n;
     index=i;
   }
-  
+
   void init() {
     indexsize=0;
-    
+
     Complex* U0=U[0];
     Complex* U1=A == 1 ? utils::ComplexAlign(m) : U[1];
-    
+
     BackwardsO=new fft1d(m,1,U0,U1);
     ForwardsO=new fft1d(m,-1,U0,U1);
     threads=std::min(threads,max(BackwardsO->Threads(),ForwardsO->Threads()));
-    
+
     if(A == B) {
       Backwards=new fft1d(m,1,U0);
       threads=std::min(threads,Backwards->Threads());
@@ -164,12 +164,12 @@ class ImplicitConvolution : public ThreadBase {
       Forwards=new fft1d(m,-1,U0);
       threads=std::min(threads,Forwards->Threads());
     }
-    
+
     if(A == 1) utils::deleteAlign(U1);
 
     s=BuildZeta(2*m,m,ZetaH,ZetaL,threads);
   }
-  
+
   // m is the number of Complex data values.
   // U is an array of C distinct work arrays each of size m, where C=max(A,B)
   // A is the number of inputs.
@@ -181,7 +181,7 @@ class ImplicitConvolution : public ThreadBase {
       allocated(false) {
     init();
   }
-  
+
   // m is the number of Complex data values.
   // u is a work array of C*m Complex values.
   // A is the number of inputs.
@@ -193,7 +193,7 @@ class ImplicitConvolution : public ThreadBase {
     initpointers(U,u);
     init();
   }
-  
+
   // m is the number of Complex data values.
   // A is the number of inputs.
   // B is the number of outputs.
@@ -205,28 +205,28 @@ class ImplicitConvolution : public ThreadBase {
     initpointers(U,u);
     init();
   }
- 
+
   ~ImplicitConvolution() {
     utils::deleteAlign(ZetaH);
     utils::deleteAlign(ZetaL);
-    
+
     if(pointers) deletepointers(U);
     if(allocated) utils::deleteAlign(u);
-    
+
     if(A == B)
       delete Backwards;
     if(A <= B)
       delete Forwards;
-    
+
     delete ForwardsO;
-    delete BackwardsO;    
+    delete BackwardsO;
   }
 
   // F is an array of A pointers to distinct data blocks each of size m,
   // shifted by offset (contents not preserved).
   void convolve(Complex **F, multiplier *pmult, unsigned int i=0,
                 unsigned int offset=0);
-  
+
   void autoconvolve(Complex *f) {
     Complex *F[]={f};
     convolve(F,multautoconvolution);
@@ -248,13 +248,13 @@ class ImplicitConvolution : public ThreadBase {
     Complex *F[]={f,g};
     convolve(F,multcorrelation);
   }
-    
+
   template<class T>
   inline void pretransform(Complex **F, unsigned int k, Vec& Zetak);
 
   template<class T>
   void pretransform(Complex **F);
-  
+
   void posttransform(Complex *f, Complex *u);
 };
 
@@ -284,25 +284,25 @@ class ImplicitHConvolution : public ThreadBase {
     unsigned int C=max(A,B);
     U=new Complex *[C];
     unsigned stride=c+1;
-    for(unsigned int a=0; a < C; ++a) 
+    for(unsigned int a=0; a < C; ++a)
       U[a]=u+a*stride;
     pointers=true;
   }
-  
+
   void deletepointers(Complex **&U) {
     delete [] U;
   }
-  
+
   void allocateindex(unsigned int n, unsigned int *i) {
     indexsize=n;
     index=i;
   }
-  
+
   void init() {
     even=m == 2*c;
     indexsize=0;
     Complex* U0=U[0];
-    
+
     rc=new rcfft1d(m,U0);
     cr=new crfft1d(m,U0);
 
@@ -310,7 +310,7 @@ class ImplicitHConvolution : public ThreadBase {
     rco=new rcfft1d(m,(double *) U0,U1);
     cro=new crfft1d(m,U1,(double *) U0);
     if(A == 1) utils::deleteAlign(U1);
-    
+
     if(A != B) {
       rcO=rco;
       crO=cro;
@@ -318,12 +318,12 @@ class ImplicitHConvolution : public ThreadBase {
       rcO=rc;
       crO=cr;
     }
-    
+
     threads=std::min(threads,std::max(rco->Threads(),cro->Threads()));
     s=BuildZeta(3*m,c+2,ZetaH,ZetaL,threads);
     w=even ? utils::ComplexAlign(max(A,B)) : u;
   }
-  
+
   // m is the number of independent data values
   // U is an array of max(A,B) distinct work arrays of size c+1, where c=m/2
   // A is the number of inputs.
@@ -359,7 +359,7 @@ class ImplicitHConvolution : public ThreadBase {
   ImplicitHConvolution(unsigned int m, bool compact, Complex *u,
                        unsigned int A=2, unsigned int B=1,
                        unsigned int threads=fftw::maxthreads)
-    : ThreadBase(threads), m(m), c(m/2), compact(compact), A(A), B(B), u(u), 
+    : ThreadBase(threads), m(m), c(m/2), compact(compact), A(A), B(B), u(u),
       allocated(false) {
     initpointers(U,u);
     init();
@@ -381,7 +381,7 @@ class ImplicitHConvolution : public ThreadBase {
     if(even) utils::deleteAlign(w);
     utils::deleteAlign(ZetaH);
     utils::deleteAlign(ZetaL);
-    
+
     if(pointers) deletepointers(U);
     if(allocated) utils::deleteAlign(u);
 
@@ -389,14 +389,14 @@ class ImplicitHConvolution : public ThreadBase {
       delete cro;
       delete rco;
     }
-    
+
     delete cr;
     delete rc;
   }
-  
+
   // F is an array of A pointers to distinct data blocks each of size m,
   // shifted by offset (contents not preserved).
-  void convolve(Complex **F, realmultiplier *pmult, unsigned int i=0,         
+  void convolve(Complex **F, realmultiplier *pmult, unsigned int i=0,
                 unsigned int offset=0);
 
   void pretransform(Complex *F, Complex *f1c, Complex *U);
@@ -408,7 +408,7 @@ class ImplicitHConvolution : public ThreadBase {
     convolve(F,multbinary);
   }
 };
-  
+
 
 // Compute the scrambled implicitly m-padded complex Fourier transform of M
 // complex vectors, each of length m.
@@ -430,36 +430,36 @@ class fftpad {
   unsigned int s;
   Complex *ZetaH, *ZetaL;
   unsigned int threads;
-public:  
-  mfft1d *Backwards; 
+public:
+  mfft1d *Backwards;
   mfft1d *Forwards;
-  
+
   fftpad(unsigned int m, unsigned int M,
          unsigned int stride, Complex *u=NULL,
          unsigned int Threads=fftw::maxthreads)
     : m(m), M(M), stride(stride), threads(Threads) {
     Backwards=new mfft1d(m,1,M,stride,1,u,NULL,threads);
     Forwards=new mfft1d(m,-1,M,stride,1,u,NULL,threads);
-    
+
     threads=std::max(Backwards->Threads(),Forwards->Threads());
-    
+
     s=BuildZeta(2*m,m,ZetaH,ZetaL,threads);
   }
-  
+
   ~fftpad() {
     utils::deleteAlign(ZetaL);
     utils::deleteAlign(ZetaH);
     delete Forwards;
     delete Backwards;
   }
-  
+
   void expand(Complex *f, Complex *u);
   void reduce(Complex *f, Complex *u);
-  
+
   void backwards(Complex *f, Complex *u);
   void forwards(Complex *f, Complex *u);
 };
-  
+
 // Compute the scrambled implicitly m-padded complex Fourier transform of M
 // complex vectors, each of length 2m-1 with the origin at index m-1,
 // containing physical data for wavenumbers -m+1 to m-1.
@@ -474,33 +474,33 @@ class fftpad {
 //   stride is the spacing between the elements of each Complex vector.
 //
 class fft0pad {
-protected:  
+protected:
   unsigned int m;
   unsigned int M;
   unsigned int s;
   unsigned int stride;
   Complex *ZetaH, *ZetaL;
   unsigned int threads;
-public:  
+public:
   mfft1d *Forwards;
   mfft1d *Backwards;
-  
+
   fft0pad(unsigned int m, unsigned int M, unsigned int stride, Complex *u=NULL,
           unsigned int Threads=fftw::maxthreads)
     : m(m), M(M), stride(stride), threads(Threads) {
     Backwards=new mfft1d(m,1,M,stride,1,u,NULL,threads);
     Forwards=new mfft1d(m,-1,M,stride,1,u,NULL,threads);
-    
+
     s=BuildZeta(3*m,m,ZetaH,ZetaL);
   }
-  
+
   virtual ~fft0pad() {
     utils::deleteAlign(ZetaL);
     utils::deleteAlign(ZetaH);
     delete Forwards;
     delete Backwards;
   }
-  
+
   // Unscramble indices, returning spatial index stored at position i
   inline static unsigned findex(unsigned i, unsigned int m) {
     return i < m-1 ? 3*i : 3*i+4-3*m; // for i >= m-1: j=3*(i-(m-1))+1
@@ -512,15 +512,15 @@ class fft0pad {
 
   virtual void expand(Complex *f, Complex *u);
   virtual void reduce(Complex *f, Complex *u);
-  
+
   void backwards(Complex *f, Complex *u);
   virtual void forwards(Complex *f, Complex *u);
-  
+
   virtual void Backwards1(Complex *f, Complex *u);
   virtual void Forwards0(Complex *f);
   virtual void Forwards1(Complex *f, Complex *u);
 };
-  
+
 // Compute the scrambled implicitly m-padded complex Fourier transform of M
 // complex vectors, each of length 2m with the origin at index m,
 // corresponding to wavenumbers -m to m-1.
@@ -535,30 +535,30 @@ class fft0pad {
 //   stride is the spacing between the elements of each Complex vector.
 //
 class fft1pad : public fft0pad {
-public:  
+public:
   fft1pad(unsigned int m, unsigned int M, unsigned int stride,
-              Complex *u=NULL, unsigned int threads=fftw::maxthreads) :
+          Complex *u=NULL, unsigned int threads=fftw::maxthreads) :
     fft0pad(m,M,stride,u,threads) {}
 
   // Unscramble indices, returning spatial index stored at position i
   inline static unsigned findex(unsigned i, unsigned int m) {
     return i < m ? 3*i : 3*(i-m)+1;
   }
-  
+
   inline static unsigned uindex(unsigned i, unsigned int m) {
     return i > 0 ? 3*i-1 : 3*m-1;
   }
-  
+
   void expand(Complex *f, Complex *u);
   void reduce(Complex *f, Complex *u);
-  
+
   void forwards(Complex *f, Complex *u);
-  
+
   void Backwards1(Complex *f, Complex *u);
   void Forwards0(Complex *f);
   void Forwards1(Complex *f, Complex *u);
 };
-  
+
 // In-place implicitly dealiased 2D complex convolution.
 class ImplicitConvolution2 : public ThreadBase {
 protected:
@@ -572,28 +572,28 @@ class ImplicitConvolution2 : public ThreadBase {
   bool allocated;
   unsigned int indexsize;
   bool toplevel;
-public:  
+public:
   unsigned int *index;
 
   void initpointers2(Complex **&U2, Complex *u2, unsigned int stride) {
     U2=new Complex *[A];
     for(unsigned int a=0; a < A; ++a)
       U2[a]=u2+a*stride;
-    
+
     if(toplevel) allocateindex(1,new unsigned int[1]);
   }
-  
+
   void deletepointers2(Complex **&U2) {
     if(toplevel) {
       delete [] index;
-      
+
       for(unsigned int t=1; t < threads; ++t)
         delete [] yconvolve[t]->index;
     }
 
     delete [] U2;
   }
-  
+
   void allocateindex(unsigned int n, unsigned int *i) {
     indexsize=n;
     index=i;
@@ -601,7 +601,7 @@ class ImplicitConvolution2 : public ThreadBase {
     for(unsigned int t=1; t < threads; ++t)
       yconvolve[t]->allocateindex(n,new unsigned int[n]);
   }
-  
+
   void init(const convolveOptions& options) {
     toplevel=options.toplevel;
     xfftpad=new fftpad(mx,options.ny,options.ny,u2,threads);
@@ -611,7 +611,7 @@ class ImplicitConvolution2 : public ThreadBase {
       yconvolve[t]=new ImplicitConvolution(my,u1+t*my*C,A,B,innerthreads);
     initpointers2(U2,u2,options.stride2);
   }
-  
+
   void set(convolveOptions& options) {
     if(options.nx == 0) options.nx=mx;
     if(options.ny == 0) {
@@ -619,7 +619,7 @@ class ImplicitConvolution2 : public ThreadBase {
       options.stride2=mx*my;
     }
   }
-  
+
   // u1 is a temporary array of size my*C*threads.
   // u2 is a temporary array of size mx*my*C.
   // A is the number of inputs.
@@ -636,7 +636,7 @@ class ImplicitConvolution2 : public ThreadBase {
     multithread(options.nx);
     init(options);
   }
-  
+
   ImplicitConvolution2(unsigned int mx, unsigned int my,
                        unsigned int A=2, unsigned int B=1,
                        unsigned int threads=fftw::maxthreads,
@@ -649,34 +649,34 @@ class ImplicitConvolution2 : public ThreadBase {
     u2=utils::ComplexAlign(options.stride2*C);
     init(options);
   }
-  
+
   virtual ~ImplicitConvolution2() {
     deletepointers2(U2);
-    
+
     for(unsigned int t=0; t < threads; ++t)
       delete yconvolve[t];
     delete [] yconvolve;
-    
+
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(u2);
       utils::deleteAlign(u1);
     }
   }
-  
+
   void backwards(Complex **F, Complex **U2, unsigned int offset) {
     for(unsigned int a=0; a < A; ++a)
       xfftpad->backwards(F[a]+offset,U2[a]);
   }
 
-  void subconvolution(Complex **F, multiplier *pmult, 
+  void subconvolution(Complex **F, multiplier *pmult,
                       unsigned int r, unsigned int M, unsigned int stride,
                       unsigned int offset=0) {
     if(threads > 1) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
       for(unsigned int i=0; i < M; ++i)
         yconvolve[get_thread_num()]->convolve(F,pmult,2*i+r,offset+i*stride);
     } else {
@@ -685,12 +685,12 @@ class ImplicitConvolution2 : public ThreadBase {
         yconvolve0->convolve(F,pmult,2*i+r,offset+i*stride);
     }
   }
-  
+
   void forwards(Complex **F, Complex **U2, unsigned int offset) {
     for(unsigned int b=0; b < B; ++b)
       xfftpad->forwards(F[b]+offset,U2[b]);
   }
-  
+
   // F is a pointer to A distinct data blocks each of size mx*my,
   // shifted by offset (contents not preserved).
   virtual void convolve(Complex **F, multiplier *pmult, unsigned int i=0,
@@ -710,7 +710,7 @@ class ImplicitConvolution2 : public ThreadBase {
     subconvolution(U2,pmult,1,mx,my);
     forwards(F,U2,offset);
   }
-  
+
   // Binary convolution:
   void convolve(Complex *f, Complex *g) {
     Complex *F[]={f,g};
@@ -755,12 +755,12 @@ inline void HermitianSymmetrizeXY(unsigned int mx, unsigned int my,
   int mxstride=mx*stride;
   unsigned int myz=my*mz;
   unsigned int origin=xorigin*stride+yorigin*mz;
-  
+
   f[origin].im=0.0;
 
   for(int i=stride; i < mxstride; i += stride)
     f[origin-i]=conj(f[origin+i]);
-  
+
   PARALLEL(
     for(int i=stride-mxstride; i < mxstride; i += stride) {
       int stop=i+myz;
@@ -789,27 +789,27 @@ class ImplicitHConvolution2 : public ThreadBase {
 public:
   unsigned int *index;
 
-  void initpointers2(Complex **&U2, Complex *u2, unsigned int stride) 
+  void initpointers2(Complex **&U2, Complex *u2, unsigned int stride)
   {
     unsigned int C=max(A,B);
     U2=new Complex *[C];
     for(unsigned int a=0; a < C; ++a)
       U2[a]=u2+a*stride;
-    
+
     if(toplevel) allocateindex(1,new unsigned int[1]);
   }
-  
+
   void deletepointers2(Complex **&U2) {
     if(toplevel) {
       delete [] index;
-      
+
       for(unsigned int t=1; t < threads; ++t)
         delete [] yconvolve[t]->index;
     }
 
     delete [] U2;
   }
-  
+
   void allocateindex(unsigned int n, unsigned int *i) {
     indexsize=n;
     index=i;
@@ -817,13 +817,13 @@ class ImplicitHConvolution2 : public ThreadBase {
     for(unsigned int t=1; t < threads; ++t)
       yconvolve[t]->allocateindex(n,new unsigned int[n]);
   }
-  
+
   void init(const convolveOptions& options) {
     unsigned int C=max(A,B);
     toplevel=options.toplevel;
     xfftpad=xcompact ? new fft0pad(mx,options.ny,options.ny,u2) :
       new fft1pad(mx,options.ny,options.ny,u2);
-    
+
     yconvolve=new ImplicitHConvolution*[threads];
     for(unsigned int t=0; t < threads; ++t)
       yconvolve[t]=new ImplicitHConvolution(my,ycompact,u1+t*(my/2+1)*C,A,B,
@@ -838,7 +838,7 @@ class ImplicitHConvolution2 : public ThreadBase {
       options.stride2=(mx+xcompact)*options.ny;
     }
   }
-  
+
   // u1 is a temporary array of size (my/2+1)*C*threads.
   // u2 is a temporary array of size (mx+xcompact)*(my+!ycompact)*C;
   // A is the number of inputs.
@@ -855,21 +855,21 @@ class ImplicitHConvolution2 : public ThreadBase {
     multithread(options.nx);
     init(options);
   }
-  
+
   ImplicitHConvolution2(unsigned int mx, unsigned int my,
                         bool xcompact, bool ycompact,
                         Complex *u1, Complex *u2,
                         unsigned int A=2, unsigned int B=1,
                         unsigned int threads=fftw::maxthreads,
                         convolveOptions options=defaultconvolveOptions) :
-    ThreadBase(threads), mx(mx), my(my), 
+    ThreadBase(threads), mx(mx), my(my),
     xcompact(xcompact), ycompact(ycompact), u1(u1), u2(u2), A(A), B(B),
     allocated(false) {
     set(options);
     multithread(options.nx);
     init(options);
   }
-  
+
   ImplicitHConvolution2(unsigned int mx, unsigned int my,
                         bool xcompact=true, bool ycompact=true,
                         unsigned int A=2, unsigned int B=1,
@@ -884,16 +884,16 @@ class ImplicitHConvolution2 : public ThreadBase {
     u2=utils::ComplexAlign(options.stride2*C);
     init(options);
   }
-  
+
   virtual ~ImplicitHConvolution2() {
     deletepointers2(U2);
-    
+
     for(unsigned int t=0; t < threads; ++t)
       delete yconvolve[t];
     delete [] yconvolve;
-    
+
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(u2);
       utils::deleteAlign(u1);
@@ -917,7 +917,7 @@ class ImplicitHConvolution2 : public ThreadBase {
     if(threads > 1) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
       for(unsigned int i=0; i < M; ++i)
         yconvolve[get_thread_num()]->convolve(F,pmult,indexfunction(i,mx),
                                               offset+i*stride);
@@ -926,14 +926,14 @@ class ImplicitHConvolution2 : public ThreadBase {
       for(unsigned int i=0; i < M; ++i)
         yconvolve0->convolve(F,pmult,indexfunction(i,mx),offset+i*stride);
     }
-  }  
-  
+  }
+
   void forwards(Complex **F, Complex **U2, unsigned int offset) {
     for(unsigned int b=0; b < B; ++b)
       xfftpad->forwards(F[b]+offset,U2[b]);
   }
-  
-  // F is a pointer to A distinct data blocks each of size 
+
+  // F is a pointer to A distinct data blocks each of size
   // (2mx-xcompact)*(my+!ycompact), shifted by offset (contents not preserved).
   virtual void convolve(Complex **F, realmultiplier *pmult,
                         bool symmetrize=true, unsigned int i=0,
@@ -954,14 +954,14 @@ class ImplicitHConvolution2 : public ThreadBase {
     subconvolution(U2,pmult,xfftpad->uindex,mx+xcompact,stride);
     forwards(F,U2,offset);
   }
-  
+
   // Binary convolution:
   void convolve(Complex *f, Complex *g, bool symmetrize=true) {
     Complex *F[]={f,g};
     convolve(F,multbinary,symmetrize);
   }
 };
-  
+
 // In-place implicitly dealiased 3D complex convolution.
 class ImplicitConvolution3 : public ThreadBase {
 protected:
@@ -976,7 +976,7 @@ class ImplicitConvolution3 : public ThreadBase {
   bool allocated;
   unsigned int indexsize;
   bool toplevel;
-public:  
+public:
   unsigned int *index;
 
   void initpointers3(Complex **&U3, Complex *u3, unsigned int stride) {
@@ -984,21 +984,21 @@ class ImplicitConvolution3 : public ThreadBase {
     U3=new Complex *[C];
     for(unsigned int a=0; a < C; ++a)
       U3[a]=u3+a*stride;
-    
+
     if(toplevel) allocateindex(2,new unsigned int[2]);
   }
-  
+
   void deletepointers3(Complex **&U3) {
     if(toplevel) {
       delete [] index;
-      
+
       for(unsigned int t=1; t < threads; ++t)
         delete [] yzconvolve[t]->index;
     }
-      
+
     delete [] U3;
   }
-  
+
   void allocateindex(unsigned int n, unsigned int *i) {
     indexsize=n;
     index=i;
@@ -1006,12 +1006,12 @@ class ImplicitConvolution3 : public ThreadBase {
     for(unsigned int t=1; t < threads; ++t)
       yzconvolve[t]->allocateindex(n,new unsigned int[n]);
   }
-  
+
   void init(const convolveOptions& options) {
     toplevel=options.toplevel;
     unsigned int nyz=options.ny*options.nz;
     xfftpad=new fftpad(mx,nyz,nyz,u3,threads);
-    
+
     if(options.nz == mz) {
       unsigned int C=max(A,B);
       yzconvolve=new ImplicitConvolution2*[threads];
@@ -1022,7 +1022,7 @@ class ImplicitConvolution3 : public ThreadBase {
       initpointers3(U3,u3,options.stride3);
     } else yzconvolve=NULL;
   }
-  
+
   void set(convolveOptions &options)
   {
     if(options.ny == 0) {
@@ -1032,7 +1032,7 @@ class ImplicitConvolution3 : public ThreadBase {
       options.stride3=mx*my*mz;
     }
   }
-  
+
   // u1 is a temporary array of size mz*C*threads.
   // u2 is a temporary array of size my*mz*C*threads.
   // u3 is a temporary array of size mx*my*mz*C.
@@ -1041,7 +1041,7 @@ class ImplicitConvolution3 : public ThreadBase {
   // Here C=max(A,B).
   ImplicitConvolution3(unsigned int mx, unsigned int my, unsigned int mz,
                        Complex *u1, Complex *u2, Complex *u3,
-                       unsigned int A=2, unsigned int B=1, 
+                       unsigned int A=2, unsigned int B=1,
                        unsigned int threads=fftw::maxthreads,
                        convolveOptions options=defaultconvolveOptions) :
     ThreadBase(threads), mx(mx), my(my), mz(mz),
@@ -1052,7 +1052,7 @@ class ImplicitConvolution3 : public ThreadBase {
   }
 
   ImplicitConvolution3(unsigned int mx, unsigned int my, unsigned int mz,
-                       unsigned int A=2, unsigned int B=1, 
+                       unsigned int A=2, unsigned int B=1,
                        unsigned int threads=fftw::maxthreads,
                        convolveOptions options=defaultconvolveOptions) :
     ThreadBase(threads), mx(mx), my(my), mz(mz), A(A), B(B),
@@ -1065,7 +1065,7 @@ class ImplicitConvolution3 : public ThreadBase {
     u3=utils::ComplexAlign(options.stride3*C);
     init(options);
   }
-  
+
   virtual ~ImplicitConvolution3() {
     if(yzconvolve) {
       deletepointers3(U3);
@@ -1074,28 +1074,28 @@ class ImplicitConvolution3 : public ThreadBase {
         delete yzconvolve[t];
       delete [] yzconvolve;
     }
-    
+
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(u3);
       utils::deleteAlign(u2);
       utils::deleteAlign(u1);
     }
   }
-  
+
   void backwards(Complex **F, Complex **U3, unsigned int offset) {
     for(unsigned int a=0; a < A; ++a)
       xfftpad->backwards(F[a]+offset,U3[a]);
   }
 
-  void subconvolution(Complex **F, multiplier *pmult, 
+  void subconvolution(Complex **F, multiplier *pmult,
                       unsigned int r, unsigned int M, unsigned int stride,
                       unsigned int offset=0) {
     if(threads > 1) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
       for(unsigned int i=0; i < M; ++i)
         yzconvolve[get_thread_num()]->convolve(F,pmult,2*i+r,offset+i*stride);
     } else {
@@ -1105,12 +1105,12 @@ class ImplicitConvolution3 : public ThreadBase {
       }
     }
   }
-  
+
   void forwards(Complex **F, Complex **U3, unsigned int offset=0) {
     for(unsigned int b=0; b < B; ++b)
       xfftpad->forwards(F[b]+offset,U3[b]);
   }
-  
+
   // F is a pointer to A distinct data blocks each of size mx*my*mz,
   // shifted by offset
   virtual void convolve(Complex **F, multiplier *pmult, unsigned int i=0,
@@ -1132,7 +1132,7 @@ class ImplicitConvolution3 : public ThreadBase {
     subconvolution(U3,pmult,1,mx,stride);
     forwards(F,U3,offset);
   }
-  
+
   // Binary convolution:
   void convolve(Complex *f, Complex *g) {
     Complex *F[]={f,g};
@@ -1171,29 +1171,29 @@ class ImplicitHConvolution3 : public ThreadBase {
   bool allocated;
   unsigned int indexsize;
   bool toplevel;
-public:     
+public:
   unsigned int *index;
-  
+
   void initpointers3(Complex **&U3, Complex *u3, unsigned int stride) {
     unsigned int C=max(A,B);
     U3=new Complex *[C];
     for(unsigned int a=0; a < C; ++a)
       U3[a]=u3+a*stride;
-    
+
     if(toplevel) allocateindex(2,new unsigned int[2]);
   }
-  
+
   void deletepointers3(Complex **&U3) {
     if(toplevel) {
       delete [] index;
-      
+
       for(unsigned int t=1; t < threads; ++t)
         delete [] yzconvolve[t]->index;
     }
-      
+
     delete [] U3;
   }
-    
+
   void allocateindex(unsigned int n, unsigned int *i) {
     indexsize=n;
     index=i;
@@ -1201,14 +1201,14 @@ class ImplicitHConvolution3 : public ThreadBase {
     for(unsigned int t=1; t < threads; ++t)
       yzconvolve[t]->allocateindex(n,new unsigned int[n]);
   }
-  
+
   void init(const convolveOptions& options) {
     toplevel=options.toplevel;
     unsigned int nyz=options.ny*options.nz;
     xfftpad=xcompact ? new fft0pad(mx,nyz,nyz,u3) :
       new fft1pad(mx,nyz,nyz,u3);
 
-      if(options.nz == mz+!zcompact) {
+    if(options.nz == mz+!zcompact) {
       unsigned int C=max(A,B);
       yzconvolve=new ImplicitHConvolution2*[threads];
       for(unsigned int t=0; t < threads; ++t)
@@ -1220,7 +1220,7 @@ class ImplicitHConvolution3 : public ThreadBase {
       initpointers3(U3,u3,options.stride3);
     } else yzconvolve=NULL;
   }
-  
+
   void set(convolveOptions& options) {
     if(options.ny == 0) {
       options.ny=2*my-ycompact;
@@ -1229,10 +1229,10 @@ class ImplicitHConvolution3 : public ThreadBase {
       options.stride3=(mx+xcompact)*options.ny*options.nz;
     }
   }
-  
+
   // u1 is a temporary array of size (mz/2+1)*C*threads.
   // u2 is a temporary array of size (my+ycompact)*(mz+!zcompact)*C*threads.
-  // u3 is a temporary array of size 
+  // u3 is a temporary array of size
   //                             (mx+xcompact)*(2my-ycompact)*(mz+!zcompact)*C.
   // A is the number of inputs.
   // B is the number of outputs.
@@ -1250,7 +1250,7 @@ class ImplicitHConvolution3 : public ThreadBase {
     multithread(mx);
     init(options);
   }
-  
+
   ImplicitHConvolution3(unsigned int mx, unsigned int my, unsigned int mz,
                         bool xcompact, bool ycompact, bool zcompact,
                         Complex *u1, Complex *u2, Complex *u3,
@@ -1258,13 +1258,13 @@ class ImplicitHConvolution3 : public ThreadBase {
                         unsigned int threads=fftw::maxthreads,
                         convolveOptions options=defaultconvolveOptions) :
     ThreadBase(threads), mx(mx), my(my), mz(mz),
-    xcompact(xcompact), ycompact(ycompact), zcompact(zcompact), 
+    xcompact(xcompact), ycompact(ycompact), zcompact(zcompact),
     u1(u1), u2(u2), u3(u3), A(A), B(B), allocated(false) {
     set(options);
     multithread(mx);
     init(options);
   }
-  
+
   ImplicitHConvolution3(unsigned int mx, unsigned int my, unsigned int mz,
                         bool xcompact=true, bool ycompact=true,
                         bool zcompact=true,
@@ -1282,31 +1282,31 @@ class ImplicitHConvolution3 : public ThreadBase {
     u3=utils::ComplexAlign(options.stride3*C);
     init(options);
   }
-  
+
   virtual ~ImplicitHConvolution3() {
     if(yzconvolve) {
       deletepointers3(U3);
-      
+
       for(unsigned int t=0; t < threads; ++t)
         delete yzconvolve[t];
       delete [] yzconvolve;
     }
 
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(u3);
       utils::deleteAlign(u2);
       utils::deleteAlign(u1);
     }
   }
-  
+
   virtual void HermitianSymmetrize(Complex *f, Complex *u)
-  {      
+  {
     HermitianSymmetrizeXY(mx,my,mz+!zcompact,mx-xcompact,my-ycompact,f,
                           threads);
   }
-  
+
   void backwards(Complex **F, Complex **U3, bool symmetrize,
                  unsigned int offset) {
     for(unsigned int a=0; a < A; ++a) {
@@ -1325,7 +1325,7 @@ class ImplicitHConvolution3 : public ThreadBase {
     if(threads > 1) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
       for(unsigned int i=0; i < M; ++i)
         yzconvolve[get_thread_num()]->convolve(F,pmult,false,
                                                indexfunction(i,mx),
@@ -1342,9 +1342,9 @@ class ImplicitHConvolution3 : public ThreadBase {
     for(unsigned int b=0; b < B; ++b)
       xfftpad->forwards(F[b]+offset,U3[b]);
   }
-  
+
   // F is a pointer to A distinct data blocks each of size
-  // (2mx-compact)*(2my-ycompact)*(mz+!zcompact), shifted by offset 
+  // (2mx-compact)*(2my-ycompact)*(mz+!zcompact), shifted by offset
   // (contents not preserved).
   virtual void convolve(Complex **F, realmultiplier *pmult,
                         bool symmetrize=true, unsigned int i=0,
@@ -1358,14 +1358,14 @@ class ImplicitHConvolution3 : public ThreadBase {
             Index[i]=index[i];
         }
       }
-    }    
+    }
     unsigned int stride=(2*my-ycompact)*(mz+!zcompact);
     backwards(F,U3,symmetrize,offset);
     subconvolution(F,pmult,xfftpad->findex,2*mx-xcompact,stride,offset);
     subconvolution(U3,pmult,xfftpad->uindex,mx+xcompact,stride);
     forwards(F,U3,offset);
   }
-    
+
   // Binary convolution:
   void convolve(Complex *f, Complex *g, bool symmetrize=true) {
     Complex *F[]={f,g};
@@ -1387,51 +1387,51 @@ class ImplicitHTConvolution : public ThreadBase {
   bool allocated;
   unsigned int twom;
   unsigned int stride;
-public:  
+public:
   void initpointers(Complex **&W, Complex *w) {
     W=new Complex *[M];
     unsigned int m1=m+1;
-    for(unsigned int s=0; s < M; ++s) 
+    for(unsigned int s=0; s < M; ++s)
       W[s]=w+s*m1;
   }
-  
+
   void deletepointers(Complex **&W) {
     delete [] W;
   }
-  
+
   void init() {
     twom=2*m;
     stride=twom+2;
-    
+
     rc=new rcfft1d(twom,u);
     cr=new crfft1d(twom,u);
-    
+
     rco=new rcfft1d(twom,(double *) u,v);
     cro=new crfft1d(twom,v,(double *) u);
-    
+
     threads=std::min(threads,std::max(rco->Threads(),cro->Threads()));
-    
+
     s=BuildZeta(4*m,m,ZetaH,ZetaL,threads);
-    
+
     initpointers(W,w);
   }
-  
+
   // u, v, and w are distinct temporary arrays each of size (m+1)*M.
   ImplicitHTConvolution(unsigned int m, Complex *u, Complex *v,
                         Complex *w, unsigned int M=1) :
     m(m), u(u), v(v), w(w), M(M), allocated(false) {
     init();
   }
-  
-  ImplicitHTConvolution(unsigned int m, unsigned int M=1) : 
+
+  ImplicitHTConvolution(unsigned int m, unsigned int M=1) :
     m(m), u(utils::ComplexAlign(m*M+M)), v(utils::ComplexAlign(m*M+M)),
     w(utils::ComplexAlign(m*M+M)), M(M), allocated(true) {
     init();
   }
-  
+
   ~ImplicitHTConvolution() {
     deletepointers(W);
-    
+
     if(allocated) {
       utils::deleteAlign(w);
       utils::deleteAlign(v);
@@ -1444,20 +1444,20 @@ class ImplicitHTConvolution : public ThreadBase {
     delete cr;
     delete rc;
   }
-  
+
   void mult(double *a, double *b, double **C, unsigned int offset=0);
-  
-  void convolve(Complex **F, Complex **G, Complex **H, 
+
+  void convolve(Complex **F, Complex **G, Complex **H,
                 Complex *u, Complex *v, Complex **W,
                 unsigned int offset=0);
-  
+
   // F, G, and H are distinct pointers to M distinct data blocks each of size
   // m+1, shifted by offset (contents not preserved).
   // The output is returned in F[0].
   void convolve(Complex **F, Complex **G, Complex **H, unsigned int offset=0) {
     convolve(F,G,H,u,v,W,offset);
   }
-  
+
   // Constructor for special case M=1:
   void convolve(Complex *f, Complex *g, Complex *h) {
     convolve(&f,&g,&h);
@@ -1477,34 +1477,34 @@ class ImplicitHFGGConvolution : public ThreadBase {
   bool allocated;
   unsigned int twom;
   unsigned int stride;
-public:  
+public:
   void init() {
     twom=2*m;
     stride=twom+2;
-    
+
     rc=new rcfft1d(twom,u);
     cr=new crfft1d(twom,u);
-    
+
     rco=new rcfft1d(twom,(double *) u,v);
     cro=new crfft1d(twom,v,(double *) u);
-    
+
     threads=std::min(threads,std::max(rco->Threads(),cro->Threads()));
-    
+
     s=BuildZeta(4*m,m,ZetaH,ZetaL,threads);
   }
-  
+
   // u and v are distinct temporary arrays each of size m+1.
   ImplicitHFGGConvolution(unsigned int m, Complex *u, Complex *v) :
     m(m), u(u), v(v), allocated(false) {
     init();
   }
-  
-  ImplicitHFGGConvolution(unsigned int m) : 
+
+  ImplicitHFGGConvolution(unsigned int m) :
     m(m), u(utils::ComplexAlign(m+1)), v(utils::ComplexAlign(m+1)),
     allocated(true) {
     init();
   }
-  
+
   ~ImplicitHFGGConvolution() {
     if(allocated) {
       utils::deleteAlign(v);
@@ -1517,11 +1517,11 @@ class ImplicitHFGGConvolution : public ThreadBase {
     delete cr;
     delete rc;
   }
-  
+
   void mult(double *a, double *b);
-  
+
   void convolve(Complex *f, Complex *g, Complex *u, Complex *v);
-  
+
   // f and g are distinct pointers to data of size m+1 (contents not
   // preserved). The output is returned in f.
   void convolve(Complex *f, Complex *g) {
@@ -1544,40 +1544,40 @@ class ImplicitHFFFConvolution : public ThreadBase {
   unsigned int stride;
 public:
   void mult(double *a);
-  
+
   void init() {
     twom=2*m;
     stride=twom+2;
-    
+
     rc=new rcfft1d(twom,u);
     cr=new crfft1d(twom,u);
-    
+
     threads=std::min(threads,std::max(rc->Threads(),cr->Threads()));
-    
+
     s=BuildZeta(4*m,m,ZetaH,ZetaL,threads);
   }
-  
+
   // u is a distinct temporary array of size m+1.
   ImplicitHFFFConvolution(unsigned int m, Complex *u) :
     m(m), u(u), allocated(false) {
     init();
   }
-  
+
   ImplicitHFFFConvolution(unsigned int m) :
     m(m), u(utils::ComplexAlign(m+1)), allocated(true) {
     init();
   }
-  
+
   ~ImplicitHFFFConvolution() {
     if(allocated)
       utils::deleteAlign(u);
-    
+
     utils::deleteAlign(ZetaL);
     utils::deleteAlign(ZetaH);
     delete cr;
     delete rc;
   }
-  
+
   void convolve(Complex *f, Complex *u);
   // f is a pointer to data of size m+1 (contents not preserved).
   // The output is returned in f.
@@ -1585,7 +1585,7 @@ class ImplicitHFFFConvolution : public ThreadBase {
     convolve(f,u);
   }
 };
-  
+
 // Compute the scrambled implicitly 2m-padded complex Fourier transform of M
 // complex vectors, each of length 2m with the Fourier origin at index m.
 // The arrays in and out (which may coincide), along
@@ -1607,27 +1607,27 @@ class fft0bipad {
   mfft1d *Forwards;
   Complex *ZetaH, *ZetaL;
   unsigned int threads;
-public:  
+public:
   fft0bipad(unsigned int m, unsigned int M, unsigned int stride,
-            Complex *f, unsigned int Threads=fftw::maxthreads) : 
+            Complex *f, unsigned int Threads=fftw::maxthreads) :
     m(m), M(M), stride(stride), threads(Threads) {
     unsigned int twom=2*m;
     Backwards=new mfft1d(twom,1,M,stride,1,f,NULL,threads);
     Forwards=new mfft1d(twom,-1,M,stride,1,f,NULL,threads);
-    
+
     threads=std::min(threads,
                      std::max(Backwards->Threads(),Forwards->Threads()));
-    
+
     s=BuildZeta(4*m,twom,ZetaH,ZetaL,threads);
   }
-  
+
   ~fft0bipad() {
     utils::deleteAlign(ZetaL);
     utils::deleteAlign(ZetaH);
     delete Forwards;
     delete Backwards;
   }
-  
+
   void backwards(Complex *f, Complex *u);
   void forwards(Complex *f, Complex *u);
 };
@@ -1645,7 +1645,7 @@ class ImplicitHTConvolution2 : public ThreadBase {
   bool allocated;
   Complex **u,**v;
   Complex ***W;
-public:  
+public:
   void initpointers(Complex **&u, Complex **&v, Complex ***&W,
                     unsigned int threads) {
     u=new Complex *[threads];
@@ -1660,7 +1660,7 @@ class ImplicitHTConvolution2 : public ThreadBase {
       yconvolve->initpointers(W[i],wi);
     }
   }
-    
+
   void deletepointers(Complex **&u, Complex **&v, Complex ***&W,
                       unsigned int threads) {
     for(unsigned int i=0; i < threads; ++i)
@@ -1669,7 +1669,7 @@ class ImplicitHTConvolution2 : public ThreadBase {
     delete [] v;
     delete [] u;
   }
-    
+
   void initpointers(Complex **&U2, Complex **&V2, Complex **&W2,
                     Complex *u2, Complex *v2, Complex *w2) {
     U2=new Complex *[M];
@@ -1683,29 +1683,29 @@ class ImplicitHTConvolution2 : public ThreadBase {
       W2[s]=w2+smu;
     }
   }
-  
+
   void deletepointers(Complex **&U2, Complex **&V2, Complex **&W2) {
     delete [] W2;
     delete [] V2;
     delete [] U2;
   }
-  
+
   void init() {
     xfftpad=new fft0bipad(mx,my,my+1,u2,threads);
-    
+
     yconvolve=new ImplicitHTConvolution(my,u1,v1,w1,M);
     yconvolve->Threads(1);
 
     initpointers(u,v,W,threads);
     initpointers(U2,V2,W2,u2,v2,w2);
   }
-  
+
   // u1, v1, and w1 are temporary arrays of size (my+1)*M*threads;
   // u2, v2, and w2 are temporary arrays of size 2mx*(my+1)*M.
   // M is the number of data blocks (each corresponding to a dot product term).
   // threads is the number of threads to use in the outer subconvolution loop.
   ImplicitHTConvolution2(unsigned int mx, unsigned int my,
-                         Complex *u1, Complex *v1, Complex *w1, 
+                         Complex *u1, Complex *v1, Complex *w1,
                          Complex *u2, Complex *v2, Complex *w2,
                          unsigned int M=1,
                          unsigned int threads=fftw::maxthreads) :
@@ -1713,7 +1713,7 @@ class ImplicitHTConvolution2 : public ThreadBase {
     u2(u2), v2(v2), w2(w2), M(M), allocated(false) {
     init();
   }
-  
+
   ImplicitHTConvolution2(unsigned int mx, unsigned int my,
                          unsigned int M=1,
                          unsigned int threads=fftw::maxthreads) :
@@ -1727,14 +1727,14 @@ class ImplicitHTConvolution2 : public ThreadBase {
     M(M), allocated(true) {
     init();
   }
-  
+
   ~ImplicitHTConvolution2() {
     deletepointers(U2,V2,W2);
     deletepointers(u,v,W,threads);
-    
+
     delete yconvolve;
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(w2);
       utils::deleteAlign(v2);
@@ -1744,32 +1744,32 @@ class ImplicitHTConvolution2 : public ThreadBase {
       utils::deleteAlign(u1);
     }
   }
-  
-  void convolve(Complex **F, Complex **G, Complex **H, 
-                Complex **u, Complex **v, Complex ***W, 
+
+  void convolve(Complex **F, Complex **G, Complex **H,
+                Complex **u, Complex **v, Complex ***W,
                 Complex **U2, Complex **V2, Complex **W2,
                 bool symmetrize=true, unsigned int offset=0) {
     Complex *u2=U2[0];
     Complex *v2=V2[0];
     Complex *w2=W2[0];
-    
+
     unsigned int my1=my+1;
     unsigned int mu=2*mx*my1;
-    
+
     for(unsigned int s=0; s < M; ++s) {
       Complex *f=F[s]+offset;
       if(symmetrize)
         HermitianSymmetrizeX(mx,my1,mx,f);
       xfftpad->backwards(f,u2+s*mu);
     }
-    
+
     for(unsigned int s=0; s < M; ++s) {
       Complex *g=G[s]+offset;
       if(symmetrize)
         HermitianSymmetrizeX(mx,my1,mx,g);
       xfftpad->backwards(g,v2+s*mu);
     }
-    
+
     for(unsigned int s=0; s < M; ++s) {
       Complex *h=H[s]+offset;
       if(symmetrize)
@@ -1779,15 +1779,15 @@ class ImplicitHTConvolution2 : public ThreadBase {
 
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1) {
       unsigned int thread=get_thread_num();
       yconvolve->convolve(F,G,H,u[thread],v[thread],W[thread],i+offset);
     }
-    
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1) {
       unsigned int thread=get_thread_num();
       yconvolve->convolve(U2,V2,W2,u[thread],v[thread],W[thread],i+offset);
@@ -1795,7 +1795,7 @@ class ImplicitHTConvolution2 : public ThreadBase {
 
     xfftpad->forwards(F[0]+offset,u2);
   }
-  
+
   // F, G, and H are distinct pointers to M distinct data blocks each of size
   // 2mx*(my+1), shifted by offset (contents not preserved).
   // The output is returned in F[0].
@@ -1821,7 +1821,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
   ImplicitHFGGConvolution *yconvolve;
   bool allocated;
   Complex **u,**v;
-public:  
+public:
   void initpointers(Complex **&u, Complex **&v, unsigned int threads) {
     u=new Complex *[threads];
     v=new Complex *[threads];
@@ -1832,21 +1832,21 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
       v[i]=v1+imy1;
     }
   }
-    
+
   void deletepointers(Complex **&u, Complex **&v) {
     delete [] v;
     delete [] u;
   }
-    
+
   void init() {
     xfftpad=new fft0bipad(mx,my,my+1,u2,threads);
-    
+
     yconvolve=new ImplicitHFGGConvolution(my,u1,v1);
     yconvolve->Threads(1);
 
     initpointers(u,v,threads);
   }
-  
+
   // u1 and v1 are temporary arrays of size (my+1)*threads.
   // u2 and v2 are temporary arrays of size 2mx*(my+1).
   // threads is the number of threads to use in the outer subconvolution loop.
@@ -1858,7 +1858,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
     allocated(false) {
     init();
   }
-  
+
   ImplicitHFGGConvolution2(unsigned int mx, unsigned int my,
                            unsigned int threads=fftw::maxthreads) :
     ThreadBase(threads), mx(mx), my(my),
@@ -1869,13 +1869,13 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
     allocated(true) {
     init();
   }
-  
+
   ~ImplicitHFGGConvolution2() {
     deletepointers(u,v);
-    
+
     delete yconvolve;
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(v2);
       utils::deleteAlign(u2);
@@ -1883,32 +1883,32 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
       utils::deleteAlign(u1);
     }
   }
-  
+
   void convolve(Complex *f, Complex *g,
                 Complex **u, Complex **v,
                 Complex *u2, Complex *v2, bool symmetrize=true) {
     unsigned int my1=my+1;
     unsigned int mu=2*mx*my1;
-    
+
     if(symmetrize)
       HermitianSymmetrizeX(mx,my1,mx,f);
     xfftpad->backwards(f,u2);
-    
+
     if(symmetrize)
       HermitianSymmetrizeX(mx,my1,mx,g);
     xfftpad->backwards(g,v2);
-    
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1) {
       unsigned int thread=get_thread_num();
       yconvolve->convolve(f+i,g+i,u[thread],v[thread]);
     }
-    
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1) {
       unsigned int thread=get_thread_num();
       yconvolve->convolve(u2+i,v2+i,u[thread],v[thread]);
@@ -1916,7 +1916,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase {
 
     xfftpad->forwards(f,u2);
   }
-  
+
   void convolve(Complex *f, Complex *g, bool symmetrize=true) {
     convolve(f,g,u,v,u2,v2,symmetrize);
   }
@@ -1933,26 +1933,26 @@ class ImplicitHFFFConvolution2 : public ThreadBase {
   ImplicitHFFFConvolution *yconvolve;
   bool allocated;
   Complex **u;
-public:  
+public:
   void initpointers(Complex **&u, unsigned int threads) {
     u=new Complex *[threads];
     unsigned int my1=my+1;
     for(unsigned int i=0; i < threads; ++i)
       u[i]=u1+i*my1;
   }
-    
+
   void deletepointers(Complex **&u) {
     delete [] u;
   }
-    
+
   void init() {
     xfftpad=new fft0bipad(mx,my,my+1,u2,threads);
-    
+
     yconvolve=new ImplicitHFFFConvolution(my,u1);
     yconvolve->Threads(1);
     initpointers(u,threads);
   }
-  
+
   // u1 is a temporary array of size (my+1)*threads.
   // u2 is a temporary array of size 2mx*(my+1).
   // threads is the number of threads to use in the outer subconvolution loop.
@@ -1963,7 +1963,7 @@ class ImplicitHFFFConvolution2 : public ThreadBase {
     u1(u1), u2(u2), allocated(false) {
     init();
   }
-  
+
   ImplicitHFFFConvolution2(unsigned int mx, unsigned int my,
                            unsigned int threads=fftw::maxthreads) :
     ThreadBase(threads), mx(mx), my(my),
@@ -1972,41 +1972,41 @@ class ImplicitHFFFConvolution2 : public ThreadBase {
     allocated(true) {
     init();
   }
-  
+
   ~ImplicitHFFFConvolution2() {
     deletepointers(u);
     delete yconvolve;
     delete xfftpad;
-    
+
     if(allocated) {
       utils::deleteAlign(u2);
       utils::deleteAlign(u1);
     }
   }
-  
+
   void convolve(Complex *f, Complex **u, Complex *u2, bool symmetrize=true) {
     unsigned int my1=my+1;
     unsigned int mu=2*mx*my1;
-    
+
     if(symmetrize)
       HermitianSymmetrizeX(mx,my1,mx,f);
     xfftpad->backwards(f,u2);
-    
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1)
       yconvolve->convolve(f+i,u[get_thread_num()]);
-    
+
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
-#endif    
+#endif
     for(unsigned int i=0; i < mu; i += my1)
       yconvolve->convolve(u2+i,u[get_thread_num()]);
 
     xfftpad->forwards(f,u2);
   }
-  
+
   void convolve(Complex *f, bool symmetrize=true) {
     convolve(f,u,u2,symmetrize);
   }
diff --git a/examples/example0.cc b/examples/example0.cc
index 06a3d304..9e273f15 100644
--- a/examples/example0.cc
+++ b/examples/example0.cc
@@ -10,33 +10,33 @@ using namespace fftwpp;
 int main()
 {
   fftw::maxthreads=get_max_threads();
-  
-  cout << "1D complex to complex in-place FFT, not using the Array class" 
+
+  cout << "1D complex to complex in-place FFT, not using the Array class"
        << endl;
 
 //  cout << endl << "FFTW requires memory alignment of " << fftw_alignment()
-//       << "."  << endl;   
-  
-  unsigned int n=4; 
+//       << "."  << endl;
+
+  unsigned int n=4;
   Complex *f=ComplexAlign(n);
-  
+
   fft1d Forward(n,-1);
   fft1d Backward(n,1);
-  
+
   for(unsigned int i=0; i < n; i++) f[i]=i;
 
   cout << "\ninput:" << endl;
-  for(unsigned int i=0; i < n; i++) cout << f[i] << endl;       
+  for(unsigned int i=0; i < n; i++) cout << f[i] << endl;
 
   Forward.fft(f);
 
   cout << "\noutput:" << endl;
-  for(unsigned int i=0; i < n; i++) cout << f[i] << endl;       
+  for(unsigned int i=0; i < n; i++) cout << f[i] << endl;
 
   Backward.fftNormalized(f);
 
   cout << "\ntransformed back:" << endl;
   for(unsigned int i=0; i < n; i++) cout << f[i] << endl;
-  
+
   deleteAlign(f);
 }
diff --git a/examples/example0m.cc b/examples/example0m.cc
index f2e65df1..a26d0df2 100644
--- a/examples/example0m.cc
+++ b/examples/example0m.cc
@@ -10,24 +10,24 @@ using namespace utils;
 using namespace fftwpp;
 
 int main()
-{  
+{
   cout << "Multiple 1D complex to complex in-place FFTs" << endl;
 
   fftw::maxthreads=4;
-  
+
   unsigned int M=10; // Number of FFTs performed
   unsigned int n=8; // Length of FFT
   unsigned int N=10; // Number data points for timing test.
 
   Complex *f=ComplexAlign(n*M);
-    
+
   mfft1d Forward(n,-1,M,M,1);
   mfft1d Backward(n,1,M,M,1);
 
   for(unsigned int i=0; i < n*M; i++) f[i]=i;
 
   unsigned int outlimit=100;
-  
+
   if(n*M < outlimit) {
     cout << endl << "input:" << endl;
     for(unsigned int i=0; i < n; i++) {
@@ -42,14 +42,14 @@ int main()
 
   // Timing test:
   seconds();
-  
+
   for(unsigned int j=0; j < N; ++j) {
     Forward.fft(f);
     Backward.fftNormalized(f);
   }
-  
+
   double time=seconds();
-  
+
   if(n*M < outlimit) {
     cout << endl << "back to input:" << endl;
     for(unsigned int i=0; i < n; i++) {
@@ -63,10 +63,10 @@ int main()
   cout << endl << "average seconds: " << time/N << endl;
 
   Complex sum=0.0;
-  for(unsigned int i=0; i < n*M; i++) 
+  for(unsigned int i=0; i < n*M; i++)
     sum += f[i];
 
   cout << endl << "sum of outputs (used for error-checking): " << sum << endl;
-  
+
   deleteAlign(f);
 }
diff --git a/examples/example0mr.cc b/examples/example0mr.cc
index cb7ad601..30109ac9 100644
--- a/examples/example0mr.cc
+++ b/examples/example0mr.cc
@@ -7,17 +7,17 @@ using namespace Array;
 using namespace fftwpp;
 
 int main()
-{  
+{
   cout << "Multiple 1D real-to-complex and complex-to-real FFTs" << endl;
 
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int nx=4, ny=4;
   unsigned int nyp=ny/2+1;
   size_t align=sizeof(Complex);
-  
+
   cout << "Out-of-place transforms:" << endl;
-    
+
   array2<double> f(nx,ny,align);
   array2<Complex> g(nx,nyp,align);
   size_t rstride=1;
@@ -25,7 +25,7 @@ int main()
   size_t rdist=ny;
   size_t cdist=nyp;
   unsigned int M=nx;
-  
+
   mrcfft1d Forward(ny, // length of transform
                    M,  // number of transforms
                    rstride,
@@ -44,18 +44,18 @@ int main()
                     f); // output array
 
   // Initialize data:
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
       f(i,j)=i+j;
-        
+
   cout << endl << "input:" << endl << f;
 
   Forward.fft(f,g);
-  
+
   cout << endl << "output:" << endl << g;
-  
+
   Backward.fftNormalized(g,f);
-  
+
   cout << endl << "back to input:" << endl << f;
 
 }
diff --git a/examples/example0r.cc b/examples/example0r.cc
index b7fe2e9c..f7f240b4 100644
--- a/examples/example0r.cc
+++ b/examples/example0r.cc
@@ -9,34 +9,34 @@ using namespace fftwpp;
 
 int main()
 {
-  cout << "1D real to complex out-of-place FFTs not using the Array class" 
+  cout << "1D real to complex out-of-place FFTs not using the Array class"
        << endl;
-   
+
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int n=4;
   unsigned int np=n/2+1;
   double *f=FFTWdouble(n);
   Complex *g=FFTWComplex(np);
-  
+
   rcfft1d Forward(n,f,g);
   crfft1d Backward(n,g,f);
-  
+
   for(unsigned int i=0; i < n; i++) f[i]=i;
-        
+
   cout << "\ninput (" << n << " doubles):" << endl;
   for(unsigned int i=0; i < n; i++) cout << f[i] << (i!=n-1 ? " " : "\n");
 
   Forward.fft(f,g);
-  
+
   cout << "\noutput (" << np << " complex):" << endl;
   for(unsigned int i=0; i < np; i++) cout << g[i] <<  (i!=np-1 ? " " : "\n");
-  
+
   Backward.fftNormalized(g,f);
-  
+
   cout << "\ntransformed back:" << endl;
   for(unsigned int i=0; i < n; i++) cout << f[i] <<  (i!=n-1 ? " " : "\n");
-  
+
   deleteAlign(g);
   deleteAlign(f);
 }
diff --git a/examples/example1.cc b/examples/example1.cc
index 60f24ff9..92a047e4 100644
--- a/examples/example1.cc
+++ b/examples/example1.cc
@@ -17,12 +17,12 @@ int main()
 
   unsigned int n=4;
   size_t align=sizeof(Complex);
-  
+
   array1<Complex> f(n,align);
-  
+
   fft1d Forward(-1,f);
   fft1d Backward(1,f);
-  
+
   for(unsigned int i=0; i < n; i++) f[i]=i;
 
   cout << "\ninput:\n" << f << endl;
@@ -30,7 +30,7 @@ int main()
   Forward.fft(f);
 
   cout << "\noutput:\n" << f << endl;
-  
+
   Backward.fftNormalized(f);
 
   cout << "\nback to input:\n" << f << endl;
diff --git a/examples/example1r.cc b/examples/example1r.cc
index e91d9901..acef971f 100644
--- a/examples/example1r.cc
+++ b/examples/example1r.cc
@@ -12,29 +12,29 @@ using namespace fftwpp;
 int main()
 {
   cout << "1D real to complex out-of-place FFT" << endl;
-  
+
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int n=4;
   unsigned int np=n/2+1;
   size_t align=sizeof(Complex);
-  
+
   array1<Complex> F(np,align);
   array1<double> f(n,align);               // For out-of-place transforms
 //  array1<double> f(2*np,(double *) F()); // For in-place transforms
-  
+
   rcfft1d Forward(n,f,F);
   crfft1d Backward(n,F,f);
-  
+
   for(unsigned int i=0; i < n; i++) f[i]=i;
 
   cout << endl << "input:" << endl << f << endl;
-        
+
   Forward.fft(f,F);
-  
+
   cout << endl << "output:" << endl << F << endl;
-  
+
   Backward.fftNormalized(F,f);
-  
+
   cout << endl << "back to input:" << endl << f << endl;
 }
diff --git a/examples/example2.cc b/examples/example2.cc
index 434c7c39..93e65823 100644
--- a/examples/example2.cc
+++ b/examples/example2.cc
@@ -16,22 +16,22 @@ int main()
 
   unsigned int nx=4, ny=4;
   size_t align=sizeof(Complex);
-  
+
   array2<Complex> f(nx,ny,align);
-  
+
   fft2d Forward(-1,f);
   fft2d Backward(1,f);
-  
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
+
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
       f(i,j)=Complex(i,j);
 
   cout << "\ninput:\n" << f;
-  
+
   Forward.fft(f);
-  
+
   cout << "\noutput:\n" << f;
-  
+
   Backward.fftNormalized(f);
 
   cout << "\nback to input:\n" << f;
diff --git a/examples/example2r.cc b/examples/example2r.cc
index 71f6f349..03e05770 100644
--- a/examples/example2r.cc
+++ b/examples/example2r.cc
@@ -14,29 +14,29 @@ int main()
   cout << "2D real to complex out-of-place FFT" << endl;
 
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int nx=4, ny=5;
   unsigned int nyp=ny/2+1;
   size_t align=sizeof(Complex);
-  
+
   array2<Complex> F(nx,nyp,align);
   array2<double> f(nx,ny,align);               // For out-of-place transforms
 //  array2<double> f(nx,2*nyp,(double *) F()); // For in-place transforms
-  
+
   rcfft2d Forward(nx,ny,f,F);
   crfft2d Backward(nx,ny,F,f);
-  
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
+
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
       f(i,j)=i+j;
-        
+
   cout << endl << "input:" << endl << f;
 
   Forward.fft0(f,F);
-  
+
   cout << endl << "output:" << endl << F;
-  
+
   Backward.fft0Normalized(F,f);
-  
+
   cout << endl << "back to input:" << endl << f;
 }
diff --git a/examples/example3.cc b/examples/example3.cc
index cb06c9ce..0632be7e 100644
--- a/examples/example3.cc
+++ b/examples/example3.cc
@@ -9,32 +9,32 @@ using namespace utils;
 using namespace Array;
 using namespace fftwpp;
 
-int main() 
+int main()
 {
   cout << "3D complex to complex in-place FFT" << endl;
 
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int nx=4, ny=4, nz=4;
   size_t align=sizeof(Complex);
-  
+
   array3<Complex> f(nx,ny,nz,align);
-  
+
   fft3d Forward(-1,f);
   fft3d Backward(1,f);
-  
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
-      for(unsigned int k=0; k < nz; k++) 
+
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
+      for(unsigned int k=0; k < nz; k++)
         f(i,j,k)=Complex(10*k+i,j);
-        
+
   cout << "\ninput:\n" << f;
-  
+
   Forward.fft(f);
-  
+
   cout << "\noutput:\n" << f;
-  
+
   Backward.fftNormalized(f);
-  
+
   cout << "\nback to input:\n" << f;
 }
diff --git a/examples/example3r.cc b/examples/example3r.cc
index 69d48942..0efa8f19 100644
--- a/examples/example3r.cc
+++ b/examples/example3r.cc
@@ -14,30 +14,30 @@ int main()
   cout << "3D real to complex out-of-place FFT" << endl;
 
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int nx=4, ny=5, nz=6;
   unsigned int nzp=nz/2+1;
   size_t align=sizeof(Complex);
-  
+
   array3<Complex> F(nx,ny,nzp,align);
   array3<double> f(nx,ny,nz,align);              // For out-of-place transforms
 // array3<double> f(nx,ny,2*nzp,(double *) F()); // For in-place transforms
 
   rcfft3d Forward(nx,ny,nz,f,F);
   crfft3d Backward(nx,ny,nz,F,f);
-  
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
-      for(unsigned int k=0; k < nz; k++) 
+
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
+      for(unsigned int k=0; k < nz; k++)
         f(i,j,k)=i+j+k;
-        
+
   cout << "\ninput:\n" << f;
-  
+
   Forward.fft(f,F);
-  
+
   cout << "\noutput:\n" << F;
-  
+
   Backward.fftNormalized(F,f);
-  
+
   cout << "\nback to input:\n" << f;
 }
diff --git a/examples/examplecconv.cc b/examples/examplecconv.cc
index ac696a05..38e31d74 100644
--- a/examples/examplecconv.cc
+++ b/examples/examplecconv.cc
@@ -10,7 +10,7 @@ using namespace utils;
 using namespace fftwpp;
 
 
-void init(Complex *f, Complex *g, unsigned int m) 
+void init(Complex *f, Complex *g, unsigned int m)
 {
   for(unsigned int k=0; k < m; k++) {
     f[k]=Complex(k,k+1);
@@ -24,28 +24,28 @@ int main(int argc, char* argv[])
 
   // size of problem
   unsigned int m=8;
-  
+
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   // allocate arrays:
   Complex *f=ComplexAlign(m);
   Complex *g=ComplexAlign(m);
-  
+
   cout << "1d non-centered complex convolution:" << endl;
   init(f,g,m);
   cout << "\ninput:\nf\tg" << endl;
-  for(unsigned int i=0; i < m; i++) 
+  for(unsigned int i=0; i < m; i++)
     cout << f[i] << "\t" << g[i] << endl;
-  
+
   ImplicitConvolution C(m);
 
   C.convolve(f,g);
 
   cout << "\noutput:" << endl;
   for(unsigned int i=0; i < m; i++) cout << f[i] << endl;
-  
+
   deleteAlign(g);
   deleteAlign(f);
 
diff --git a/examples/examplecconv2.cc b/examples/examplecconv2.cc
index e855fbb9..de74efcb 100644
--- a/examples/examplecconv2.cc
+++ b/examples/examplecconv2.cc
@@ -14,7 +14,7 @@ using namespace fftwpp;
 unsigned int mx=4;
 unsigned int my=4;
 
-inline void init(array2<Complex>& f, array2<Complex>& g) 
+inline void init(array2<Complex>& f, array2<Complex>& g)
 {
   for(unsigned int i=0; i < mx; ++i) {
     for(unsigned int j=0; j < my; j++) {
@@ -30,7 +30,7 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
+#endif
 
   cout << "2D non-centered complex convolution:" << endl;
 
diff --git a/examples/examplecconv3.cc b/examples/examplecconv3.cc
index 56c766fc..779da8cb 100644
--- a/examples/examplecconv3.cc
+++ b/examples/examplecconv3.cc
@@ -15,7 +15,7 @@ unsigned int mx=4;
 unsigned int my=4;
 unsigned int mz=4;
 
-inline void init(array3<Complex>& f, array3<Complex>& g, unsigned int M=1) 
+inline void init(array3<Complex>& f, array3<Complex>& g, unsigned int M=1)
 {
   double factor=1.0/sqrt((double) M);
   for(unsigned int s=0; s < M; ++s) {
@@ -40,7 +40,7 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
+#endif
 
   cout << "3D non-centered complex convolution:" << endl;
 
diff --git a/examples/examplecconvfull.cc b/examples/examplecconvfull.cc
index e3019bf0..e1a7a1f1 100644
--- a/examples/examplecconvfull.cc
+++ b/examples/examplecconvfull.cc
@@ -11,7 +11,7 @@ using namespace fftwpp;
 
 double f0[]={0,1,2,3,4};
 
-void init(Complex *f, Complex *g, unsigned int m) 
+void init(Complex *f, Complex *g, unsigned int m)
 {
   for(unsigned int k=0; k < m; k++) {
     f[k]=f0[k];
@@ -26,9 +26,9 @@ void multbinaryfull(Complex **F, unsigned int m,
 {
   Complex* F0=F[0];
   Complex* F1=F[1];
-  
+
   double sign=r == 0 ? 1 : -1;
-  
+
   PARALLEL(
     for(unsigned int j=0; j < m; ++j) {
       Complex *F0j=F0+j;
@@ -46,30 +46,30 @@ int main(int argc, char* argv[])
 
   // size of problem
   unsigned int m=sizeof(f0)/sizeof(double);
-  
+
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   // allocate arrays:
   Complex *F[2];
   Complex *f=F[0]=ComplexAlign(2*m);
   Complex *g=F[1]=f+m;
-  
+
   ImplicitConvolution C(m,2,2);
 
   cout << "1d non-centered complex convolution:" << endl;
   init(f,g,m);
   cout << "\ninput:\nf\tg" << endl;
-  for(unsigned int i=0; i < m; i++) 
+  for(unsigned int i=0; i < m; i++)
     cout << f[i] << "\t" << g[i] << endl;
-  
+
   C.convolve(F,multbinaryfull);
 
   cout << "\noutput:" << endl;
   for(unsigned int i=0; i < m; i++) cout << f[i] << endl;
   for(unsigned int i=0; i < m-1; i++) cout << g[i] << endl;
-  
+
   deleteAlign(f);
 
   return 0;
diff --git a/examples/examplecconvsame.cc b/examples/examplecconvsame.cc
index ddb2bfb7..a9461db2 100644
--- a/examples/examplecconvsame.cc
+++ b/examples/examplecconvsame.cc
@@ -13,7 +13,7 @@ using namespace fftwpp;
 
 double f0[]={0,1,2,3,4};
 
-void init(Complex *f, Complex *g, unsigned int m) 
+void init(Complex *f, Complex *g, unsigned int m)
 {
   for(unsigned int k=0; k < m; k++) {
     f[k]=f0[k];
@@ -31,7 +31,7 @@ class ZetaTable {
     s=BuildZeta(twopi*c/(2*m),2*m,ZetaH,ZetaL);
   }
 };
-  
+
 static void multbinarysame(Complex **F, unsigned int m,
                            const unsigned int indexsize,
                            const unsigned int *index,
@@ -42,7 +42,7 @@ static void multbinarysame(Complex **F, unsigned int m,
 
   Complex* F0=F[0];
   Complex* F1=F[1];
-  
+
   unsigned int c=m/2;
   if(2*c == m) {
     if(r == 0) {
@@ -65,7 +65,7 @@ static void multbinarysame(Complex **F, unsigned int m,
           sign=-sign;
         }
         );
-      
+
     }
   } else {
     if(m != lastm) {
@@ -77,11 +77,11 @@ static void multbinarysame(Complex **F, unsigned int m,
         list[m]=zeta;
       } else zeta=p->second;
     }
-  
+
     Complex *ZetaH=zeta.ZetaH;
     Complex *ZetaL=zeta.ZetaL;
     unsigned int s=zeta.s;
-  
+
     PARALLEL(
       for(unsigned int j=0; j < m; ++j) {
         Complex *F0j=F0+j;
@@ -103,29 +103,29 @@ int main(int argc, char* argv[])
 
   // size of problem
   unsigned int m=sizeof(f0)/sizeof(double);
-  
+
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   // allocate arrays:
   Complex *F[2];
   Complex *f=F[0]=ComplexAlign(2*m);
   Complex *g=F[1]=f+m;
-  
+
   ImplicitConvolution C(m);
 
   cout << "1d non-centered complex convolution:" << endl;
   init(f,g,m);
   cout << "\ninput:\nf\tg" << endl;
-  for(unsigned int i=0; i < m; i++) 
+  for(unsigned int i=0; i < m; i++)
     cout << f[i] << "\t" << g[i] << endl;
-  
+
   C.convolve(F,multbinarysame);
 
   cout << "\noutput:" << endl;
   for(unsigned int i=0; i < m; i++) cout << f[i] << endl;
-  
+
   deleteAlign(f);
 
   return 0;
diff --git a/examples/exampleconv.cc b/examples/exampleconv.cc
index 4277c851..e490adb9 100644
--- a/examples/exampleconv.cc
+++ b/examples/exampleconv.cc
@@ -10,7 +10,7 @@ using namespace utils;
 using namespace Array;
 using namespace fftwpp;
 
-inline void init(Complex *f, Complex *g, unsigned int m) 
+inline void init(Complex *f, Complex *g, unsigned int m)
 {
   for(unsigned int k=0; k < m; k++) {
     f[k]=Complex(k,k+1);
@@ -27,8 +27,8 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   // 1d centered Hermitian-symmetric complex convolution
   cout << "1D centered Hermitian-symmetric convolution:" << endl;
 
@@ -38,15 +38,15 @@ int main(int argc, char* argv[])
 
   init(f,g,m);
   cout << "\ninput:\nf\tg" << endl;
-  for(unsigned int i=0; i < m; i++) 
+  for(unsigned int i=0; i < m; i++)
     cout << f[i] << "\t" << g[i] << endl;
-  
+
   ImplicitHConvolution C(m);
   C.convolve(f,g);
-  
+
   cout << "\noutput:" << endl;
   for(unsigned int i=0; i < m; i++) cout << f[i] << endl;
-    
+
   deleteAlign(g);
   deleteAlign(f);
 
diff --git a/examples/exampleconv2.cc b/examples/exampleconv2.cc
index 2e1d1514..79b92703 100644
--- a/examples/exampleconv2.cc
+++ b/examples/exampleconv2.cc
@@ -16,7 +16,7 @@ unsigned int m=8;
 unsigned int mx=4;
 unsigned int my=4;
 
-inline void init(array2<Complex>& f, array2<Complex>& g, unsigned int M=1) 
+inline void init(array2<Complex>& f, array2<Complex>& g, unsigned int M=1)
 {
   unsigned int stop=2*mx-1;
   unsigned int stopoffset=stop;
@@ -41,8 +41,8 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   cout << "2D centered Hermitian-symmetric convolution:" << endl;
 
   size_t align=sizeof(Complex);
@@ -52,7 +52,7 @@ int main(int argc, char* argv[])
   cout << "\ninput:" << endl;
   cout << "f:" << endl << f;
   cout << "g:" << endl << g;
-  
+
   /*
     cout << "input after symmetrization (done automatically):" << endl;
     HermitianSymmetrizeX(mx,my,mx-1,f);
@@ -60,7 +60,7 @@ int main(int argc, char* argv[])
     cout << "f:" << endl << f << endl;
     cout << "g:" << endl << g << endl;
   */
-  
+
   bool symmetrize=true;
   ImplicitHConvolution2 C(mx,my);
   C.convolve(f,g,symmetrize);
diff --git a/examples/exampleconv3.cc b/examples/exampleconv3.cc
index 09e0f1ae..6882bee0 100644
--- a/examples/exampleconv3.cc
+++ b/examples/exampleconv3.cc
@@ -15,7 +15,7 @@ unsigned int mx=4;
 unsigned int my=4;
 unsigned int mz=4;
 
-inline void init(array3<Complex>& f, array3<Complex>& g, unsigned int M=1) 
+inline void init(array3<Complex>& f, array3<Complex>& g, unsigned int M=1)
 {
   unsigned int xstop=2*mx-1;
   unsigned int ystop=2*my-1;
@@ -42,8 +42,8 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
+#endif
+
   cout << "3D centered Hermitian-symmetric convolution:" << endl;
 
   size_t align=sizeof(Complex);
@@ -54,7 +54,7 @@ int main(int argc, char* argv[])
   cout << "\ninput:" << endl;
   cout << "f:" << endl << f;
   cout << "g:" << endl << g;
-  
+
   /*
     cout << "input after symmetrization (done automatically):" << endl;
     HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,f);
@@ -62,7 +62,7 @@ int main(int argc, char* argv[])
     cout << "f:" << endl << f << endl;
     cout << "g:" << endl << g << endl;
   */
-  
+
   bool symmetrize=true;
   ImplicitHConvolution3 C(mx,my,mz);
   C.convolve(f,g,symmetrize);
diff --git a/examples/exampletranspose.cc b/examples/exampletranspose.cc
index 22e984be..0951ffe4 100644
--- a/examples/exampletranspose.cc
+++ b/examples/exampletranspose.cc
@@ -16,23 +16,23 @@ int main()
 
   unsigned int nx=4, ny=4;
   size_t align=sizeof(Complex);
-  
+
   array2<Complex> f(nx,ny,align);
   array2<Complex> ft(ny,nx,align);
-  
+
   Transpose T(nx,ny,1,f(),ft(),fftw::maxthreads);
   Transpose Tinv(ny,nx,1,ft(),f(),fftw::maxthreads);
-  
-  for(unsigned int i=0; i < nx; i++) 
-    for(unsigned int j=0; j < ny; j++) 
+
+  for(unsigned int i=0; i < nx; i++)
+    for(unsigned int j=0; j < ny; j++)
       f(i,j)=Complex(i,j);
 
   cout << "\ninput:\n" << f;
-  
+
   T.transpose(f(),ft());
-    
+
   cout << "\noutput:\n" << ft;
-  
+
   Tinv.transpose(ft(),f());
 
   cout << "\nback to input:\n" << f;
diff --git a/fftw++.h b/fftw++.h
index 01e1f551..467387d3 100644
--- a/fftw++.h
+++ b/fftw++.h
@@ -2,7 +2,7 @@
    Copyright (C) 2004-16
    John C. Bowman, University of Alberta
    Malcolm Roberts, University of Strasbourg
-                         
+
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
@@ -39,22 +39,22 @@
 #include <omp.h>
 #endif
 
-inline int get_thread_num() 
+inline int get_thread_num()
 {
 #ifdef FFTWPP_SINGLE_THREAD
   return 0;
 #else
   return omp_get_thread_num();
-#endif  
+#endif
 }
 
-inline int get_max_threads() 
+inline int get_max_threads()
 {
 #ifdef FFTWPP_SINGLE_THREAD
   return 1;
 #else
   return omp_get_max_threads();
-#endif  
+#endif
 }
 
 #ifndef FFTWPP_SINGLE_THREAD
@@ -69,7 +69,7 @@ inline int get_max_threads()
 #define PARALLEL(code)                          \
   {                                             \
     code                                        \
-  }
+      }
 #endif
 
 #ifndef __Complex_h__
@@ -117,12 +117,12 @@ class ThreadBase
 protected:
   unsigned int threads;
   unsigned int innerthreads;
-public:  
+public:
   ThreadBase();
   ThreadBase(unsigned int threads) : threads(threads) {}
   void Threads(unsigned int nthreads) {threads=nthreads;}
   unsigned int Threads() {return threads;}
-  
+
   void multithread(unsigned int nx) {
     if(nx >= threads) {
       innerthreads=1;
@@ -137,17 +137,17 @@ inline unsigned int realsize(unsigned int n, Complex *in, Complex *out=NULL)
 {
   return (!out || in == out) ? 2*(n/2+1) : n;
 }
-  
+
 inline unsigned int realsize(unsigned int n, Complex *in, double *out)
 {
- return realsize(n,in,(Complex *) out);
+  return realsize(n,in,(Complex *) out);
 }
-  
+
 inline unsigned int realsize(unsigned int n, double *in, Complex *out)
 {
- return realsize(n,(Complex *) in,out);
+  return realsize(n,(Complex *) in,out);
 }
-  
+
 // Base clase for fft routines
 //
 class fftw : public ThreadBase {
@@ -159,24 +159,24 @@ class fftw : public ThreadBase {
 
   fftw_plan plan;
   bool inplace;
-  
+
   unsigned int Dist(unsigned int n, size_t stride, size_t dist) {
     return dist ? dist : ((stride == 1) ? n : 1);
   }
-  
+
   static const double twopi;
-  
+
 public:
   static unsigned int effort;
   static unsigned int maxthreads;
   static double testseconds;
   static const char *WisdomName;
   static fftw_plan (*planner)(fftw *f, Complex *in, Complex *out);
-  
+
   virtual unsigned int Threads() {return threads;}
-  
+
   static const char *oddshift;
-  
+
   // Inplace shift of Fourier origin to (nx/2,0) for even nx.
   static void Shift(Complex *data, unsigned int nx, unsigned int ny,
                     unsigned int threads) {
@@ -260,41 +260,41 @@ class fftw : public ThreadBase {
       exit(1);
     }
   }
-  
+
   fftw() : plan(NULL) {}
   fftw(unsigned int doubles, int sign, unsigned int threads,
        unsigned int n=0) :
-    doubles(doubles), sign(sign), threads(threads), 
+    doubles(doubles), sign(sign), threads(threads),
     norm(1.0/(n ? n : doubles/2)), plan(NULL) {
 #ifndef FFTWPP_SINGLE_THREAD
     fftw_init_threads();
-#endif      
+#endif
   }
-  
+
   virtual ~fftw() {
     if(plan) fftw_destroy_plan(plan);
   }
-  
+
   virtual fftw_plan Plan(Complex *in, Complex *out) {return NULL;};
-  
+
   inline void CheckAlign(Complex *p, const char *s) {
     if((size_t) p % sizeof(Complex) == 0) return;
-    std::cerr << "WARNING: " << s << " array is not " << sizeof(Complex) 
+    std::cerr << "WARNING: " << s << " array is not " << sizeof(Complex)
               << "-byte aligned: address " << p << std::endl;
   }
-  
+
   void noplan() {
     std::cerr << "Unable to construct FFTW plan" << std::endl;
     exit(1);
   }
-  
+
   static void planThreads(unsigned int threads) {
 #ifndef FFTWPP_SINGLE_THREAD
     omp_set_num_threads(threads);
     fftw_plan_with_nthreads(threads);
-#endif    
+#endif
   }
-  
+
   threaddata time(fftw_plan plan1, fftw_plan planT, Complex *in, Complex *out,
                   unsigned int Threads) {
     utils::statistics S,ST;
@@ -339,30 +339,30 @@ class fftw : public ThreadBase {
     }
     return threaddata(threads,S.mean(),S.stdev());
   }
-  
+
   virtual threaddata lookup(bool inplace, unsigned int threads) {
     return threaddata();
   }
   virtual void store(bool inplace, const threaddata& data) {}
-  
+
   inline Complex *CheckAlign(Complex *in, Complex *out, bool constructor=true)
   {
-#ifndef NO_CHECK_ALIGN    
+#ifndef NO_CHECK_ALIGN
     CheckAlign(in,constructor ? "constructor input" : "input");
     if(out) CheckAlign(out,constructor ? "constructor output" : "output");
     else out=in;
 #else
     if(!out) out=in;
-#endif    
+#endif
     return out;
   }
-  
+
   threaddata Setup(Complex *in, Complex *out=NULL) {
     bool alloc=!in;
     if(alloc) in=utils::ComplexAlign((doubles+1)/2);
     out=CheckAlign(in,out);
     inplace=(out==in);
-    
+
     threaddata data;
     unsigned int Threads=threads;
     if(threads > 1) data=lookup(inplace,threads);
@@ -370,13 +370,13 @@ class fftw : public ThreadBase {
     planThreads(threads);
     plan=(*planner)(this,in,out);
     if(!plan) noplan();
-    
+
     fftw_plan planT;
     if(fftw::maxthreads > 1) {
       threads=Threads;
       planThreads(threads);
       planT=(*planner)(this,in,out);
-    
+
       if(data.threads == 0) {
         if(planT)
           data=time(plan,planT,in,out,threads);
@@ -384,11 +384,11 @@ class fftw : public ThreadBase {
         store(inplace,threaddata(threads,data.mean,data.stdev));
       }
     }
-    
+
     if(alloc) Array::deleteAlign(in,(doubles+1)/2);
     return data;
   }
-  
+
   threaddata Setup(Complex *in, double *out) {
     return Setup(in,(Complex *) out);
   }
@@ -396,11 +396,11 @@ class fftw : public ThreadBase {
   threaddata Setup(double *in, Complex *out=NULL) {
     return Setup((Complex *) in,out);
   }
-  
+
   virtual void Execute(Complex *in, Complex *out, bool=false) {
     fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out);
   }
-    
+
   Complex *Setout(Complex *in, Complex *out) {
     out=CheckAlign(in,out,false);
     if(inplace ^ (out == in)) {
@@ -409,33 +409,33 @@ class fftw : public ThreadBase {
     }
     return out;
   }
-  
+
   void fft(Complex *in, Complex *out=NULL) {
     out=Setout(in,out);
     Execute(in,out);
   }
-    
+
   void fft(double *in, Complex *out=NULL) {
     fft((Complex *) in,out);
   }
-  
+
   void fft(Complex *in, double *out) {
     fft(in,(Complex *) out);
   }
-  
+
   void fft0(Complex *in, Complex *out=NULL) {
     out=Setout(in,out);
     Execute(in,out,true);
   }
-    
+
   void fft0(double *in, Complex *out=NULL) {
     fft0((Complex *) in,out);
   }
-  
+
   void fft0(Complex *in, double *out) {
     fft0(in,(Complex *) out);
   }
-  
+
   void Normalize(Complex *out) {
     unsigned int stop=doubles/2;
 #ifndef FFTWPP_SINGLE_THREAD
@@ -450,29 +450,29 @@ class fftw : public ThreadBase {
 #endif
     for(unsigned int i=0; i < doubles; i++) out[i] *= norm;
   }
-  
-  virtual void fftNormalized(Complex *in, Complex *out=NULL, bool shift=false) 
+
+  virtual void fftNormalized(Complex *in, Complex *out=NULL, bool shift=false)
   {
     out=Setout(in,out);
     Execute(in,out,shift);
     Normalize(out);
   }
-  
+
   virtual void fftNormalized(Complex *in, double *out, bool shift=false) {
     out=(double *) Setout(in,(Complex *) out);
     Execute(in,(Complex *) out,shift);
     Normalize(out);
   }
-  
+
   virtual void fftNormalized(double *in, Complex *out, bool shift=false) {
     fftNormalized((Complex *) in,out,shift);
   }
-  
+
   template<class I, class O>
   void fft0Normalized(I in, O out) {
     fftNormalized(in,out,true);
   }
-  
+
   template<class O>
   void Normalize(unsigned int nx, unsigned int M, size_t ostride,
                  size_t odist, O *out) {
@@ -488,7 +488,7 @@ class fftw : public ThreadBase {
       }
     }
   }
-  
+
   template<class I, class O>
   void fftNormalized(unsigned int nx, unsigned int M, size_t ostride,
                      size_t odist, I *in, O *out=NULL, bool shift=false) {
@@ -508,7 +508,7 @@ class Transpose {
             T *in, T *out=NULL, unsigned int threads=fftw::maxthreads) {
     unsigned int size=sizeof(T);
     if(size % sizeof(double) != 0) {
-      std::cerr << "ERROR: Transpose is not implemented for type of size " 
+      std::cerr << "ERROR: Transpose is not implemented for type of size "
                 << size;
       exit(1);
     }
@@ -520,14 +520,14 @@ class Transpose {
     if(!out) out=in;
     inplace=(out==in);
     fftw::planThreads(threads);
-    
+
     fftw_iodim dims[3];
 
-    dims[0].n=rows; 
+    dims[0].n=rows;
     dims[0].is=cols*length;
     dims[0].os=length;
-    
-    dims[1].n=cols; 
+
+    dims[1].n=cols;
     dims[1].is=length;
     dims[1].os=rows*length;
 
@@ -543,7 +543,7 @@ class Transpose {
   ~Transpose() {
     if(plan) fftw_destroy_plan(plan);
   }
-  
+
   template<class T>
   void transpose(T *in, T *out=NULL) {
     if(!plan) return;
@@ -565,7 +565,7 @@ class Threadtable {
     typename Table::iterator p=table.find(key);
     return p == table.end() ? threaddata() : p->second;
   }
-  
+
   void Store(Table& threadtable, T key, const threaddata& data) {
     threadtable[key]=data;
   }
@@ -575,10 +575,10 @@ struct keytype1 {
   unsigned int nx;
   unsigned int threads;
   bool inplace;
-  keytype1(unsigned int nx, unsigned int threads, bool inplace) : 
+  keytype1(unsigned int nx, unsigned int threads, bool inplace) :
     nx(nx), threads(threads), inplace(inplace) {}
 };
-  
+
 struct keyless1 {
   bool operator()(const keytype1& a, const keytype1& b) const {
     return a.nx < b.nx || (a.nx == b.nx &&
@@ -593,10 +593,10 @@ struct keytype2 {
   unsigned int threads;
   bool inplace;
   keytype2(unsigned int nx, unsigned int ny, unsigned int threads,
-           bool inplace) : 
+           bool inplace) :
     nx(nx), ny(ny), threads(threads), inplace(inplace) {}
 };
-  
+
 struct keyless2 {
   bool operator()(const keytype2& a, const keytype2& b) const {
     return a.nx < b.nx || (a.nx == b.nx &&
@@ -614,13 +614,13 @@ struct keytype3 {
   unsigned int threads;
   bool inplace;
   keytype3(unsigned int nx, unsigned int ny, unsigned int nz,
-           unsigned int threads, bool inplace) : 
+           unsigned int threads, bool inplace) :
     nx(nx), ny(ny), nz(nz), threads(threads), inplace(inplace) {}
 };
-  
+
 struct keyless3 {
   bool operator()(const keytype3& a, const keytype3& b) const {
-    return a.nx < b.nx || (a.nx == b.nx && 
+    return a.nx < b.nx || (a.nx == b.nx &&
                            (a.ny < b.ny || (a.ny == b.ny &&
                                             (a.nz < b.nz ||
                                              (a.nz == b.nz &&
@@ -634,7 +634,7 @@ struct keyless3 {
 // Before calling fft(), the arrays in and out (which may coincide) must be
 // allocated as Complex[n].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   fft1d Forward(n,-1,in,out);
 //   Forward.fft(in,out);
@@ -656,31 +656,31 @@ struct keyless3 {
 class fft1d : public fftw, public Threadtable<keytype1,keyless1> {
   unsigned int nx;
   static Table threadtable;
-public:  
+public:
   fft1d(unsigned int nx, int sign, Complex *in=NULL, Complex *out=NULL,
         unsigned int threads=maxthreads)
-    : fftw(2*nx,sign,threads), nx(nx) {Setup(in,out);} 
-  
+    : fftw(2*nx,sign,threads), nx(nx) {Setup(in,out);}
+
 #ifdef __Array_h__
   fft1d(int sign, const Array::array1<Complex>& in,
         const Array::array1<Complex>& out=Array::NULL1,
-        unsigned int threads=maxthreads) 
-    : fftw(2*in.Nx(),sign,threads), nx(in.Nx()) {Setup(in,out);} 
-#endif  
-  
+        unsigned int threads=maxthreads)
+    : fftw(2*in.Nx(),sign,threads), nx(in.Nx()) {Setup(in,out);}
+#endif
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return this->Lookup(threadtable,keytype1(nx,threads,inplace));
   }
   void store(bool inplace, const threaddata& data) {
     this->Store(threadtable,keytype1(nx,data.threads,inplace),data);
   }
-  
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_1d(nx,(fftw_complex *) in,(fftw_complex *) out,
                             sign,effort);
   }
 };
-  
+
 template<class I, class O>
 class fftwblock : public virtual fftw {
 public:
@@ -699,19 +699,19 @@ class fftwblock : public virtual fftw {
     T=1;
     Q=M;
     R=0;
-    
+
     threaddata S1=Setup(in,out);
     fftw_plan planT1=plan;
-    
+
     if(fftw::maxthreads > 1) {
       if(Threads > 1) {
         T=std::min(M,Threads);
         Q=T > 0 ? M/T : 0;
         R=M-Q*T;
-    
+
         threads=Threads;
         threaddata ST=Setup(in,out);
-        
+
         if(R > 0 && threads == 1 && plan1 != plan2) {
           fftw_destroy_plan(plan2);
           plan2=plan1;
@@ -735,23 +735,23 @@ class fftwblock : public virtual fftw {
       } else
         Setup(in,out); // Synchronize wisdom
     }
-  }    
-  
+  }
+
   fftw_plan Plan(int Q, fftw_complex *in, fftw_complex *out) {
     return fftw_plan_many_dft(1,&nx,Q,in,NULL,istride,idist,
                               out,NULL,ostride,odist,sign,effort);
   }
-  
+
   fftw_plan Plan(int Q, double *in, fftw_complex *out) {
     return fftw_plan_many_dft_r2c(1,&nx,Q,in,NULL,istride,idist,
                                   out,NULL,ostride,odist,effort);
   }
-  
+
   fftw_plan Plan(int Q, fftw_complex *in, double *out) {
     return fftw_plan_many_dft_c2r(1,&nx,Q,in,NULL,istride,idist,
                                   out,NULL,ostride,odist,effort);
   }
-  
+
   fftw_plan Plan(Complex *in, Complex *out) {
     if(R > 0) {
       plan2=Plan(Q+1,(I *) in,(O *) out);
@@ -760,11 +760,11 @@ class fftwblock : public virtual fftw {
     }
     return Plan(Q,(I *) in,(O *) out);
   }
-    
+
   void Execute(fftw_plan plan, fftw_complex *in, fftw_complex *out) {
     fftw_execute_dft(plan,in,out);
   }
-  
+
   void Execute(fftw_plan plan, double *in, fftw_complex *out) {
     fftw_execute_dft_r2c(plan,in,out);
   }
@@ -792,20 +792,20 @@ class fftwblock : public virtual fftw {
       }
     }
   }
-  
+
   unsigned int Threads() {return std::max(T,threads);}
-  
+
   ~fftwblock() {
     if(plan2) fftw_destroy_plan(plan2);
   }
 };
-  
+
 // Compute the complex Fourier transform of M complex vectors, each of
 // length n.
 // Before calling fft(), the arrays in and out (which may coincide) must be
 // allocated as Complex[M*n].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   mfft1d Forward(n,-1,M,stride,dist,in,out);
 //   Forward.fft(in,out);
@@ -823,14 +823,14 @@ class fftwblock : public virtual fftw {
 class mfft1d : public fftwblock<fftw_complex,fftw_complex>,
                public Threadtable<keytype3,keyless3> {
   static Table threadtable;
-public:  
+public:
   mfft1d(unsigned int nx, int sign, unsigned int M=1, size_t stride=1,
          size_t dist=0, Complex *in=NULL, Complex *out=NULL,
          unsigned int threads=maxthreads) :
     fftw(2*((nx-1)*stride+(M-1)*Dist(nx,stride,dist)+1),sign,threads,nx),
     fftwblock<fftw_complex,fftw_complex>
-    (nx,M,stride,stride,dist,dist,in,out,threads) {} 
-  
+    (nx,M,stride,stride,dist,dist,in,out,threads) {}
+
   mfft1d(unsigned int nx, int sign, unsigned int M,
          size_t istride, size_t ostride, size_t idist, size_t odist,
          Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads):
@@ -838,8 +838,8 @@ class mfft1d : public fftwblock<fftw_complex,fftw_complex>,
                   2*((nx-1)*ostride+(M-1)*Dist(nx,ostride,odist)+1)),sign,
          threads, nx),
     fftwblock<fftw_complex,fftw_complex>(nx,M,istride,ostride,idist,odist,in,
-                                         out,threads) {} 
-  
+                                         out,threads) {}
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace));
   }
@@ -847,13 +847,13 @@ class mfft1d : public fftwblock<fftw_complex,fftw_complex>,
     Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data);
   }
 };
-  
+
 // Compute the complex Fourier transform of n real values, using phase sign -1.
 // Before calling fft(), the array in must be allocated as double[n] and
 // the array out must be allocated as Complex[n/2+1]. The arrays in and out
 // may coincide, allocated as Complex[n/2+1].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   rcfft1d Forward(n,in,out);
 //   Forward.fft(in,out);
@@ -862,7 +862,7 @@ class mfft1d : public fftwblock<fftw_complex,fftw_complex>,
 //
 //   rcfft1d Forward(n);
 //   Forward.fft(out);
-// 
+//
 // Notes:
 //   in contains the n real values stored as a Complex array;
 //   out contains the first n/2+1 Complex Fourier values.
@@ -870,36 +870,36 @@ class mfft1d : public fftwblock<fftw_complex,fftw_complex>,
 class rcfft1d : public fftw, public Threadtable<keytype1,keyless1> {
   unsigned int nx;
   static Table threadtable;
-public:  
-  rcfft1d(unsigned int nx, Complex *out=NULL, unsigned int threads=maxthreads) 
+public:
+  rcfft1d(unsigned int nx, Complex *out=NULL, unsigned int threads=maxthreads)
     : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(out,(double*) NULL);}
-  
+
   rcfft1d(unsigned int nx, double *in, Complex *out=NULL,
-          unsigned int threads=maxthreads)  
+          unsigned int threads=maxthreads)
     : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(in,out);}
-  
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return Lookup(threadtable,keytype1(nx,threads,inplace));
   }
   void store(bool inplace, const threaddata& data) {
     Store(threadtable,keytype1(nx,data.threads,inplace),data);
   }
-  
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_r2c_1d(nx,(double *) in,(fftw_complex *) out, effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool=false) {
     fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out);
   }
 };
-  
+
 // Compute the real inverse Fourier transform of the n/2+1 Complex values
 // corresponding to the non-negative part of the frequency spectrum, using
 // phase sign +1.
 // Before calling fft(), the array in must be allocated as Complex[n/2+1]
 // and the array out must be allocated as double[n]. The arrays in and out
-// may coincide, allocated as Complex[n/2+1]. 
+// may coincide, allocated as Complex[n/2+1].
 //
 // Out-of-place usage (input destroyed):
 //
@@ -910,7 +910,7 @@ class rcfft1d : public fftw, public Threadtable<keytype1,keyless1> {
 //
 //   crfft1d Backward(n);
 //   Backward.fft(in);
-// 
+//
 // Notes:
 //   in contains the first n/2+1 Complex Fourier values.
 //   out contains the n real values stored as a Complex array;
@@ -918,25 +918,25 @@ class rcfft1d : public fftw, public Threadtable<keytype1,keyless1> {
 class crfft1d : public fftw, public Threadtable<keytype1,keyless1> {
   unsigned int nx;
   static Table threadtable;
-public:  
-  crfft1d(unsigned int nx, double *out=NULL, unsigned int threads=maxthreads) 
-    : fftw(2*(nx/2+1),1,threads,nx), nx(nx) {Setup(out);} 
-  
-  crfft1d(unsigned int nx, Complex *in, double *out=NULL, 
+public:
+  crfft1d(unsigned int nx, double *out=NULL, unsigned int threads=maxthreads)
+    : fftw(2*(nx/2+1),1,threads,nx), nx(nx) {Setup(out);}
+
+  crfft1d(unsigned int nx, Complex *in, double *out=NULL,
           unsigned int threads=maxthreads)
-    : fftw(realsize(nx,in,out),1,threads,nx), nx(nx) {Setup(in,out);} 
-  
+    : fftw(realsize(nx,in,out),1,threads,nx), nx(nx) {Setup(in,out);}
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return Lookup(threadtable,keytype1(nx,threads,inplace));
   }
   void store(bool inplace, const threaddata& data) {
     Store(threadtable,keytype1(nx,data.threads,inplace),data);
   }
-  
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_c2r_1d(nx,(fftw_complex *) in,(double *) out,effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool=false) {
     fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out);
   }
@@ -948,7 +948,7 @@ class crfft1d : public fftw, public Threadtable<keytype1,keyless1> {
 // Complex[M*(n/2+1)]. The arrays in and out may coincide,
 // allocated as Complex[M*(n/2+1)].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   mrcfft1d Forward(n,M,istride,ostride,idist,odist,in,out);
 //   Forward.fft(in,out);
@@ -957,7 +957,7 @@ class crfft1d : public fftw, public Threadtable<keytype1,keyless1> {
 //
 //   mrcfft1d Forward(n,M,istride,ostride,idist,odist);
 //   Forward.fft(out);
-// 
+//
 // Notes:
 //   istride is the spacing between the elements of each real vector;
 //   ostride is the spacing between the elements of each Complex vector;
@@ -974,28 +974,28 @@ class mrcfft1d : public fftwblock<double,fftw_complex>,
            size_t istride, size_t ostride,
            size_t idist, size_t odist,
            double *in=NULL, Complex *out=NULL,
-           unsigned int threads=maxthreads) 
+           unsigned int threads=maxthreads)
     : fftw(std::max((realsize(nx,in,out)-2)*istride+(M-1)*idist+2,
                     2*(nx/2*ostride+(M-1)*odist+1)),-1,threads,nx),
       fftwblock<double,fftw_complex>
     (nx,M,istride,ostride,idist,odist,(Complex *) in,out,threads) {}
-  
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace));
   }
-  
+
   void store(bool inplace, const threaddata& data) {
     Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data);
   }
-  
+
   void Normalize(Complex *out) {
     fftw::Normalize<Complex>(nx/2+1,M,ostride,odist,out);
   }
-  
+
   void fftNormalized(double *in, Complex *out=NULL, bool shift=false) {
     fftw::fftNormalized<double,Complex>(nx/2+1,M,ostride,odist,in,out,false);
   }
-  
+
   void fft0Normalized(double *in, Complex *out=NULL) {
     fftw::fftNormalized<double,Complex>(nx/2+1,M,ostride,odist,in,out,true);
   }
@@ -1006,7 +1006,7 @@ class mrcfft1d : public fftwblock<double,fftw_complex>,
 // spectra, using phase sign +1. Before calling fft(), the array in must be
 // allocated as Complex[M*(n/2+1)] and the array out must be allocated as
 // double[M*n]. The arrays in and out may coincide,
-// allocated as Complex[M*(n/2+1)].  
+// allocated as Complex[M*(n/2+1)].
 //
 // Out-of-place usage (input destroyed):
 //
@@ -1017,7 +1017,7 @@ class mrcfft1d : public fftwblock<double,fftw_complex>,
 //
 //   mcrfft1d Backward(n,M,istride,ostride,idist,odist);
 //   Backward.fft(out);
-// 
+//
 // Notes:
 //   stride is the spacing between the elements of each Complex vector;
 //   dist is the spacing between the first elements of the vectors;
@@ -1030,38 +1030,38 @@ class mcrfft1d : public fftwblock<fftw_complex,double>,
 public:
   mcrfft1d(unsigned int nx, unsigned int M, size_t istride, size_t ostride,
            size_t idist, size_t odist, Complex *in=NULL, double *out=NULL,
-           unsigned int threads=maxthreads) 
+           unsigned int threads=maxthreads)
     : fftw(std::max(2*(nx/2*istride+(M-1)*idist+1),
                     (realsize(nx,in,out)-2)*ostride+(M-1)*odist+2),1,threads,nx),
       fftwblock<fftw_complex,double>
     (nx,M,istride,ostride,idist,odist,in,(Complex *) out,threads) {}
-  
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace));
   }
-  
+
   void store(bool inplace, const threaddata& data) {
     Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data);
   }
-  
+
   void Normalize(double *out) {
     fftw::Normalize<double>(nx,M,ostride,odist,out);
   }
-  
+
   void fftNormalized(Complex *in, double *out=NULL, bool shift=false) {
     fftw::fftNormalized<Complex,double>(nx,M,ostride,odist,in,out,false);
   }
-  
+
   void fft0Normalized(Complex *in, double *out=NULL) {
     fftw::fftNormalized<Complex,double>(nx,M,ostride,odist,in,out,true);
   }
 };
-  
+
 // Compute the complex two-dimensional Fourier transform of nx times ny
 // complex values. Before calling fft(), the arrays in and out (which may
 // coincide) must be allocated as Complex[nx*ny].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   fft2d Forward(nx,ny,-1,in,out);
 //   Forward.fft(in,out);
@@ -1087,32 +1087,32 @@ class fft2d : public fftw, public Threadtable<keytype2,keyless2> {
   unsigned int nx;
   unsigned int ny;
   static Table threadtable;
-public:  
+public:
   fft2d(unsigned int nx, unsigned int ny, int sign, Complex *in=NULL,
-        Complex *out=NULL, unsigned int threads=maxthreads) 
-    : fftw(2*nx*ny,sign,threads), nx(nx), ny(ny) {Setup(in,out);} 
-  
+        Complex *out=NULL, unsigned int threads=maxthreads)
+    : fftw(2*nx*ny,sign,threads), nx(nx), ny(ny) {Setup(in,out);}
+
 #ifdef __Array_h__
   fft2d(int sign, const Array::array2<Complex>& in,
-        const Array::array2<Complex>& out=Array::NULL2, 
-        unsigned int threads=maxthreads) 
+        const Array::array2<Complex>& out=Array::NULL2,
+        unsigned int threads=maxthreads)
     : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()) {
     Setup(in,out);
   }
-#endif  
-  
+#endif
+
   threaddata lookup(bool inplace, unsigned int threads) {
     return this->Lookup(threadtable,keytype2(nx,ny,threads,inplace));
   }
   void store(bool inplace, const threaddata& data) {
     this->Store(threadtable,keytype2(nx,ny,data.threads,inplace),data);
   }
-  
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_2d(nx,ny,(fftw_complex *) in,(fftw_complex *) out,
                             sign,effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool=false) {
     fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out);
   }
@@ -1122,9 +1122,9 @@ class fft2d : public fftw, public Threadtable<keytype2,keyless2> {
 // values, using phase sign -1.
 // Before calling fft(), the array in must be allocated as double[nx*ny] and
 // the array out must be allocated as Complex[nx*(ny/2+1)]. The arrays in
-// and out may coincide, allocated as Complex[nx*(ny/2+1)]. 
+// and out may coincide, allocated as Complex[nx*(ny/2+1)].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   rcfft2d Forward(nx,ny,in,out);
 //   Forward.fft(in,out);       // Origin of Fourier domain at (0,0)
@@ -1136,7 +1136,7 @@ class fft2d : public fftw, public Threadtable<keytype2,keyless2> {
 //   rcfft2d Forward(nx,ny);
 //   Forward.fft(in);           // Origin of Fourier domain at (0,0)
 //   Forward.fft0(in);          // Origin of Fourier domain at (nx/2,0)
-// 
+//
 // Notes:
 //   in contains the nx*ny real values stored as a Complex array;
 //   out contains the upper-half portion (ky >= 0) of the Complex transform.
@@ -1144,22 +1144,22 @@ class fft2d : public fftw, public Threadtable<keytype2,keyless2> {
 class rcfft2d : public fftw {
   unsigned int nx;
   unsigned int ny;
-public:  
+public:
   rcfft2d(unsigned int nx, unsigned int ny, Complex *out=NULL,
-          unsigned int threads=maxthreads) 
-    : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} 
-  
+          unsigned int threads=maxthreads)
+    : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);}
+
   rcfft2d(unsigned int nx, unsigned int ny, double *in, Complex *out=NULL,
-          unsigned int threads=maxthreads) 
+          unsigned int threads=maxthreads)
     : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {
     Setup(in,out);
-  } 
-  
+  }
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_r2c_2d(nx,ny,(double *) in,(fftw_complex *) out,
                                 effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool shift=false) {
     if(shift) {
       if(inplace) Shift(in,nx,ny,threads);
@@ -1167,7 +1167,7 @@ class rcfft2d : public fftw {
     }
     fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out);
   }
-  
+
   // Set Nyquist modes of even shifted transforms to zero.
   void deNyquist(Complex *f) {
     unsigned int nyp=ny/2+1;
@@ -1185,14 +1185,14 @@ class rcfft2d : public fftw {
         f[(i+1)*nyp-1]=0.0;
   }
 };
-  
+
 // Compute the real two-dimensional inverse Fourier transform of the
 // nx*(ny/2+1) Complex values corresponding to the spectral values in the
 // half-plane ky >= 0, using phase sign +1.
 // Before calling fft(), the array in must be allocated as
 // Complex[nx*(ny/2+1)] and the array out must be allocated as
 // double[nx*ny]. The arrays in and out may coincide,
-// allocated as Complex[nx*(ny/2+1)]. 
+// allocated as Complex[nx*(ny/2+1)].
 //
 // Out-of-place usage (input destroyed):
 //
@@ -1205,7 +1205,7 @@ class rcfft2d : public fftw {
 //   crfft2d Backward(nx,ny);
 //   Backward.fft(in);          // Origin of Fourier domain at (0,0)
 //   Backward.fft0(in);         // Origin of Fourier domain at (nx/2,0)
-// 
+//
 // Notes:
 //   in contains the upper-half portion (ky >= 0) of the Complex transform;
 //   out contains the nx*ny real values stored as a Complex array.
@@ -1213,22 +1213,22 @@ class rcfft2d : public fftw {
 class crfft2d : public fftw {
   unsigned int nx;
   unsigned int ny;
-public:  
+public:
   crfft2d(unsigned int nx, unsigned int ny, double *out=NULL,
           unsigned int threads=maxthreads) :
-    fftw(2*nx*(ny/2+1),1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} 
-  
+    fftw(2*nx*(ny/2+1),1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);}
+
   crfft2d(unsigned int nx, unsigned int ny, Complex *in, double *out=NULL,
           unsigned int threads=maxthreads)
     : fftw(nx*realsize(ny,in,out),1,threads,nx*ny), nx(nx), ny(ny) {
     Setup(in,out);
-  } 
-  
+  }
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_c2r_2d(nx,ny,(fftw_complex *) in,(double *) out,
                                 effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool shift=false) {
     fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out);
     if(shift) {
@@ -1236,7 +1236,7 @@ class crfft2d : public fftw {
       else Shift((double *) out,nx,ny,threads);
     }
   }
-  
+
   // Set Nyquist modes of even shifted transforms to zero.
   void deNyquist(Complex *f) {
     unsigned int nyp=ny/2+1;
@@ -1255,11 +1255,11 @@ class crfft2d : public fftw {
   }
 };
 
-// Compute the complex three-dimensional Fourier transform of 
+// Compute the complex three-dimensional Fourier transform of
 // nx times ny times nz complex values. Before calling fft(), the arrays in
 // and out (which may coincide) must be allocated as Complex[nx*ny*nz].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   fft3d Forward(nx,ny,nz,-1,in,out);
 //   Forward.fft(in,out);
@@ -1286,20 +1286,20 @@ class fft3d : public fftw {
   unsigned int nx;
   unsigned int ny;
   unsigned int nz;
-public:  
+public:
   fft3d(unsigned int nx, unsigned int ny, unsigned int nz,
         int sign, Complex *in=NULL, Complex *out=NULL,
-        unsigned int threads=maxthreads) 
-    : fftw(2*nx*ny*nz,sign,threads), nx(nx), ny(ny), nz(nz) {Setup(in,out);} 
-  
+        unsigned int threads=maxthreads)
+    : fftw(2*nx*ny*nz,sign,threads), nx(nx), ny(ny), nz(nz) {Setup(in,out);}
+
 #ifdef __Array_h__
   fft3d(int sign, const Array::array3<Complex>& in,
         const Array::array3<Complex>& out=Array::NULL3,
-        unsigned int threads=maxthreads) 
-    : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()), nz(in.Nz()) 
+        unsigned int threads=maxthreads)
+    : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()), nz(in.Nz())
   {Setup(in,out);}
-#endif  
-  
+#endif
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) in,
                             (fftw_complex *) out, sign, effort);
@@ -1310,9 +1310,9 @@ class fft3d : public fftw {
 // nx times ny times nz real values, using phase sign -1.
 // Before calling fft(), the array in must be allocated as double[nx*ny*nz]
 // and the array out must be allocated as Complex[nx*ny*(nz/2+1)]. The
-// arrays in and out may coincide, allocated as Complex[nx*ny*(nz/2+1)]. 
+// arrays in and out may coincide, allocated as Complex[nx*ny*(nz/2+1)].
 //
-// Out-of-place usage: 
+// Out-of-place usage:
 //
 //   rcfft3d Forward(nx,ny,nz,in,out);
 //   Forward.fft(in,out);       // Origin of Fourier domain at (0,0)
@@ -1323,7 +1323,7 @@ class fft3d : public fftw {
 //   rcfft3d Forward(nx,ny,nz);
 //   Forward.fft(in);           // Origin of Fourier domain at (0,0)
 //   Forward.fft0(in);          // Origin of Fourier domain at (nx/2,ny/2,0)
-// 
+//
 // Notes:
 //   in contains the nx*ny*nz real values stored as a Complex array;
 //   out contains the upper-half portion (kz >= 0) of the Complex transform.
@@ -1332,23 +1332,23 @@ class rcfft3d : public fftw {
   unsigned int nx;
   unsigned int ny;
   unsigned int nz;
-public:  
+public:
   rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *out=NULL,
           unsigned int threads=maxthreads)
     : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) {
     Setup(out);
-  } 
-  
+  }
+
   rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *in,
-          Complex *out=NULL, unsigned int threads=maxthreads) 
+          Complex *out=NULL, unsigned int threads=maxthreads)
     : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz),
-      nx(nx), ny(ny), nz(nz) {Setup(in,out);} 
-  
+      nx(nx), ny(ny), nz(nz) {Setup(in,out);}
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_r2c_3d(nx,ny,nz,(double *) in,(fftw_complex *) out,
                                 effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool shift=false) {
     if(shift) {
       if(inplace) Shift(in,nx,ny,nz,threads);
@@ -1356,7 +1356,7 @@ class rcfft3d : public fftw {
     }
     fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out);
   }
-  
+
   // Set Nyquist modes of even shifted transforms to zero.
   void deNyquist(Complex *f) {
     unsigned int nzp=nz/2+1;
@@ -1368,7 +1368,7 @@ class rcfft3d : public fftw {
       for(unsigned int k=0; k < yz; ++k)
         f[k]=0.0;
     }
-    
+
     if(ny % 2 == 0) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
@@ -1379,7 +1379,7 @@ class rcfft3d : public fftw {
           f[iyz+k]=0.0;
       }
     }
-        
+
     if(nz % 2 == 0)
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
@@ -1389,14 +1389,14 @@ class rcfft3d : public fftw {
           f[i*yz+(j+1)*nzp-1]=0.0;
   }
 };
-  
+
 // Compute the real two-dimensional inverse Fourier transform of the
 // nx*ny*(nz/2+1) Complex values corresponding to the spectral values in the
 // half-plane kz >= 0, using phase sign +1.
 // Before calling fft(), the array in must be allocated as
 // Complex[nx*ny*(nz+1)/2] and the array out must be allocated as
 // double[nx*ny*nz]. The arrays in and out may coincide,
-// allocated as Complex[nx*ny*(nz/2+1)]. 
+// allocated as Complex[nx*ny*(nz/2+1)].
 //
 // Out-of-place usage (input destroyed):
 //
@@ -1409,7 +1409,7 @@ class rcfft3d : public fftw {
 //   crfft3d Backward(nx,ny,nz);
 //   Backward.fft(in);          // Origin of Fourier domain at (0,0)
 //   Backward.fft0(in);         // Origin of Fourier domain at (nx/2,ny/2,0)
-// 
+//
 // Notes:
 //   in contains the upper-half portion (kz >= 0) of the Complex transform;
 //   out contains the nx*ny*nz real values stored as a Complex array.
@@ -1418,22 +1418,22 @@ class crfft3d : public fftw {
   unsigned int nx;
   unsigned int ny;
   unsigned int nz;
-public:  
+public:
   crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *out=NULL,
-          unsigned int threads=maxthreads) 
+          unsigned int threads=maxthreads)
     : fftw(2*nx*ny*(nz/2+1),1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz)
-  {Setup(out);} 
-  
+  {Setup(out);}
+
   crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *in,
-          double *out=NULL, unsigned int threads=maxthreads) 
+          double *out=NULL, unsigned int threads=maxthreads)
     : fftw(nx*ny*(realsize(nz,in,out)),1,threads,nx*ny*nz), nx(nx), ny(ny),
-      nz(nz) {Setup(in,out);} 
-  
+      nz(nz) {Setup(in,out);}
+
   fftw_plan Plan(Complex *in, Complex *out) {
     return fftw_plan_dft_c2r_3d(nx,ny,nz,(fftw_complex *) in,(double *) out,
                                 effort);
   }
-  
+
   void Execute(Complex *in, Complex *out, bool shift=false) {
     fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out);
     if(shift) {
@@ -1441,7 +1441,7 @@ class crfft3d : public fftw {
       else Shift((double *) out,nx,ny,nz,threads);
     }
   }
-  
+
   // Set Nyquist modes of even shifted transforms to zero.
   void deNyquist(Complex *f) {
     unsigned int nzp=nz/2+1;
@@ -1453,7 +1453,7 @@ class crfft3d : public fftw {
       for(unsigned int k=0; k < yz; ++k)
         f[k]=0.0;
     }
-    
+
     if(ny % 2 == 0) {
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
@@ -1464,7 +1464,7 @@ class crfft3d : public fftw {
           f[iyz+k]=0.0;
       }
     }
-        
+
     if(nz % 2 == 0)
 #ifndef FFTWPP_SINGLE_THREAD
 #pragma omp parallel for num_threads(threads)
diff --git a/seconds.h b/seconds.h
index 4f157b8f..0b92e902 100644
--- a/seconds.h
+++ b/seconds.h
@@ -13,48 +13,48 @@
 #else
 #define DELTA_EPOCH_IN_MICROSECS  11644473600000000ULL
 #endif
- 
+
 struct timezone
 {
   int  tz_minuteswest; /* minutes W of Greenwich */
   int  tz_dsttime;     /* type of dst correction */
 };
- 
+
 // Definition of a gettimeofday function
- 
+
 inline int gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 // Define a structure to receive the current Windows filetime
   FILETIME ft;
- 
+
 // Initialize the present time to 0 and the timezone to UTC
   unsigned __int64 tmpres = 0;
   static int tzflag = 0;
- 
+
   if (NULL != tv)
     {
       GetSystemTimeAsFileTime(&ft);
- 
-// The GetSystemTimeAsFileTime returns the number of 100 nanosecond 
-// intervals since Jan 1, 1601 in a structure. Copy the high bits to 
+
+// The GetSystemTimeAsFileTime returns the number of 100 nanosecond
+// intervals since Jan 1, 1601 in a structure. Copy the high bits to
 // the 64 bit tmpres, shift it left by 32 then or in the low 32 bits.
       tmpres |= ft.dwHighDateTime;
       tmpres <<= 32;
       tmpres |= ft.dwLowDateTime;
- 
+
 // Convert to microseconds by dividing by 10
       tmpres /= 10;
- 
-// The Unix epoch starts on Jan 1 1970.  Need to subtract the difference 
+
+// The Unix epoch starts on Jan 1 1970.  Need to subtract the difference
 // in seconds from Jan 1 1601.
       tmpres -= DELTA_EPOCH_IN_MICROSECS;
- 
-// Finally change microseconds to seconds and place in the seconds value. 
+
+// Finally change microseconds to seconds and place in the seconds value.
 // The modulus picks up the microseconds.
       tv->tv_sec = (long)(tmpres / 1000000UL);
       tv->tv_usec = (long)(tmpres % 1000000UL);
     }
- 
+
   if (NULL != tz)
     {
       if (!tzflag)
@@ -62,12 +62,12 @@ inline int gettimeofday(struct timeval *tv, struct timezone *tz)
           _tzset();
           tzflag++;
         }
-  
+
 // Adjust for the timezone west of Greenwich
       tz->tz_minuteswest = _timezone / 60;
       tz->tz_dsttime = _daylight;
     }
- 
+
   return 0;
 }
 
diff --git a/statistics.h b/statistics.h
index 56394d64..196c60c6 100644
--- a/statistics.h
+++ b/statistics.h
@@ -9,7 +9,7 @@ class statistics {
   double varL;
   double varH;
 public:
-  statistics() : N(0), A(0.0), varL(0.0), varH(0.0) {} 
+  statistics() : N(0), A(0.0), varL(0.0), varH(0.0) {}
   double count() {return N;}
   double mean() {return A;}
   void add(double t) {
@@ -36,14 +36,14 @@ class statistics {
     return stdev(varH,2.0);
   }
   void output(const char *text, unsigned int m) {
-    std::cout << text << ":\n" 
-              << m << "\t" 
-              << A << "\t" 
-              << stdevL() << "\t" 
+    std::cout << text << ":\n"
+              << m << "\t"
+              << A << "\t"
+              << stdevL() << "\t"
               << stdevH() << std::endl;
   }
 };
-  
+
 }
 
 #endif
diff --git a/tests/cconv.cc b/tests/cconv.cc
index 2a92da5b..9b397bc6 100644
--- a/tests/cconv.cc
+++ b/tests/cconv.cc
@@ -18,7 +18,7 @@ bool Test=false;
 unsigned int A=2; // number of inputs
 unsigned int B=1; // number of outputs
 
-inline void init(Complex **F, unsigned int m, unsigned int A) 
+inline void init(Complex **F, unsigned int m, unsigned int A)
 {
   if(A % 2 == 0) {
     unsigned int M=A/2;
@@ -30,14 +30,14 @@ inline void init(Complex **F, unsigned int m, unsigned int A)
       Complex *gs=F[s+M];
       if(Test) {
         for(unsigned int k=0; k < m; k++) {
-	  fs[k]=factor*iF*pow(E,k*I);
-	  gs[k]=factor*iG*pow(E,k*I);
-	}
+          fs[k]=factor*iF*pow(E,k*I);
+          gs[k]=factor*iG*pow(E,k*I);
+        }
       } else {
         for(unsigned int k=0; k < m; k++) {
-	  fs[k]=ffactor*Complex(k,k+1);
-	  gs[k]=gfactor*Complex(k,2*k+1);
-	}
+          fs[k]=ffactor*Complex(k,k+1);
+          gs[k]=gfactor*Complex(k,2*k+1);
+        }
       }
     }
   } else {
@@ -60,7 +60,7 @@ void multA(Complex **F, unsigned int m,
     case 2: multbinary(F,m,indexsize,index,r,threads); break;
     case 4: multbinary2(F,m,indexsize,index,r,threads); break;
     default:
-      cerr << "A=" << A << " is not yet implemented" << endl; 
+      cerr << "A=" << A << " is not yet implemented" << endl;
       exit(1);
   }
 
@@ -79,26 +79,26 @@ int main(int argc, char* argv[])
   bool Direct=false;
   bool Implicit=true;
   bool Explicit=false;
-  
+
   // Number of iterations.
   unsigned int N0=1000000000;
   unsigned int N=0;
-  
+
   unsigned int m=11; // Problem size
-  
+
   int stats=0; // Type of statistics used in timing test.
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeiptA:B::N:m:n:S:T:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -151,7 +151,7 @@ int main(int argc, char* argv[])
 
   cout << "n=" << n << endl;
   cout << "m=" << m << endl;
-  
+
   if(N == 0) {
     N=N0/n;
     N = max(N, 20);
@@ -164,21 +164,21 @@ int main(int argc, char* argv[])
 
   if(B < 1)
     B=1;
-  
+
   unsigned int np=Explicit ? n : m;
   unsigned int C=max(A,B);
   Complex *f=ComplexAlign(C*np);
-  
+
   Complex **F=new Complex *[C];
   for(unsigned int s=0; s < C; ++s)
     F[s]=f+s*np;
-  
+
   Complex *h0=NULL;
   if(Test || Direct)
     h0=ComplexAlign(m*B);
 
   double *T=new double[N];
-  
+
   if(Implicit) {
     ImplicitConvolution C(m,A,B);
     cout << "threads=" << C.Threads() << endl << endl;
@@ -186,22 +186,22 @@ int main(int argc, char* argv[])
     multiplier *mult=NULL;
     switch(B) {
       case 1:
-	switch(A) {
-	  case 1: mult=multautoconvolution; break;
-	  case 2: mult=multbinary; break;
-	  case 4: mult=multbinary2; break;
-	  case 6: mult=multbinary3; break;
-	  case 8: mult=multbinary4; break;
-	  case 16: mult=multbinary8; break;
-	  default:
-	    cerr << "A=" << A << ", B=" << B << " is not yet implemented"
-		 << endl;
-	    exit(1);
-	}
-	break;
+        switch(A) {
+          case 1: mult=multautoconvolution; break;
+          case 2: mult=multbinary; break;
+          case 4: mult=multbinary2; break;
+          case 6: mult=multbinary3; break;
+          case 8: mult=multbinary4; break;
+          case 16: mult=multbinary8; break;
+          default:
+            cerr << "A=" << A << ", B=" << B << " is not yet implemented"
+                 << endl;
+            exit(1);
+        }
+        break;
       default:
-	  mult=multA;
-	break;
+        mult=multA;
+        break;
     }
     if(!mult) {
       cerr << "A=" << A << ", B=" << B << " is not yet implemented"
@@ -221,24 +221,24 @@ int main(int argc, char* argv[])
 
     if(m < 100) {
       for(unsigned int b=0; b < B; ++b) {
-	for(unsigned int i=0; i < m; i++)
-	  cout << F[b][i] << endl;
-	cout << endl;
+        for(unsigned int i=0; i < m; i++)
+          cout << F[b][i] << endl;
+        cout << endl;
       }
     }
     else {
       cout << f[0] << endl;
     }
-      
+
     if(Test || Direct) {
       for(unsigned int b=0; b < B; ++b) {
-	for(unsigned int i=0; i < m; i++) {
-	  h0[i+b*m]=F[b][i];
-	}
+        for(unsigned int i=0; i < m; i++) {
+          h0[i+b*m]=F[b][i];
+        }
       }
     }
   }
-  
+
   if(Explicit) {
     ExplicitConvolution C(n,m,F[0]);
     for(unsigned int i=0; i < N; ++i) {
@@ -251,16 +251,16 @@ int main(int argc, char* argv[])
     cout << endl;
     timings("Explicit",m,T,N,stats);
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++)
         cout << F[0][i] << endl;
     else cout << F[0][0] << endl;
     cout << endl;
     if(Test || Direct)
       for(unsigned int i=0; i < m; i++)
-	h0[i]=F[0][i];
+        h0[i]=F[0][i];
   }
-  
+
   if(Direct) {
     DirectConvolution C(m);
     if(A % 2 == 0 && A > 2)
@@ -273,13 +273,13 @@ int main(int argc, char* argv[])
     if(A == 1)
       C.autoconvolve(h,F[0]);
     T[0]=seconds();
-    
+
     cout << endl;
     timings("Direct",m,T,1);
 
     if(m < 100)
       for(unsigned int i=0; i < m; i++)
-	cout << h[i] << endl;
+        cout << h[i] << endl;
     else
       cout << h[0] << endl;
 
@@ -287,7 +287,7 @@ int main(int argc, char* argv[])
       double error=0.0;
       double norm=0.0;
       for(unsigned int b=0; b < B; ++b) {
-	double factor=1.0+b;
+        double factor=1.0+b;
         for(unsigned long long k=0; k < m; k++) {
           error += abs2(h0[k+b*m]-factor*h[k]);
           norm += abs2(h[k]);
@@ -296,15 +296,15 @@ int main(int argc, char* argv[])
       if(norm > 0) error=sqrt(error/norm);
       cout << "error=" << error << endl;
       if (error > 1e-12)
-	cerr << "Caution! error=" << error << endl;
+        cerr << "Caution! error=" << error << endl;
     }
-    
+
     if(Test)
       for(unsigned int i=0; i < m; i++)
-	h0[i]=h[i];
+        h0[i]=h[i];
     deleteAlign(h);
   }
-    
+
   if(Test) {
     Complex *h=ComplexAlign(n*B);
     // test accuracy of convolution methods:
@@ -329,11 +329,11 @@ int main(int argc, char* argv[])
       for(unsigned long long k=0; k < m; k++)
         h[k]=k*(0.5*k*(k+1)) - k*(k+1)*(2*k+1)/6.0;
     }
-    
+
     if(!testok) {
       cout << "ERROR: no test case for A="<<A<<endl;
       exit(1);
-    }      
+    }
 
     for(unsigned long long k=0; k < m; k++) {
       error += abs2(h0[k]-h[k]);
diff --git a/tests/cconv2.cc b/tests/cconv2.cc
index 946b213f..4aea9576 100644
--- a/tests/cconv2.cc
+++ b/tests/cconv2.cc
@@ -19,7 +19,7 @@ unsigned int my=4;
 
 bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 
-inline void init(Complex **F, unsigned int nxp, unsigned int nyp, unsigned int A) 
+inline void init(Complex **F, unsigned int nxp, unsigned int nyp, unsigned int A)
 {
   if(A%2 == 0) {
     unsigned int M=A/2;
@@ -43,10 +43,10 @@ inline void init(Complex **F, unsigned int nxp, unsigned int nyp, unsigned int A
     exit(1);
   }
 }
-  
+
 unsigned int outlimit=100;
 
-void add(Complex *f, Complex *F) 
+void add(Complex *f, Complex *F)
 {
   for(unsigned int i=0; i < mx; ++i) {
     unsigned int imy=i*my;
@@ -69,15 +69,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeiptA:B:N:m:x:y:n:T:S:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -140,13 +140,13 @@ int main(int argc, char* argv[])
 
   cout << "nx=" << nx << ", ny=" << ny << endl;
   cout << "mx=" << mx << ", my=" << my << endl;
-  
+
   if(N == 0) {
     N=N0/nx/ny;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   size_t align=sizeof(Complex);
   array2<Complex> h0;
   if(Direct) h0.Allocate(mx,my,align);
@@ -161,7 +161,7 @@ int main(int argc, char* argv[])
     cerr << "B=" << B << " is not yet implemented for A=" << A << endl;
     exit(1);
   }
-  
+
   // Allocate input/ouput memory and set up pointers
   Complex **F=new Complex *[A];
   for(unsigned int a=0; a < A; ++a)
@@ -169,19 +169,19 @@ int main(int argc, char* argv[])
 
   // For easy access of first element
   array2<Complex> f(mx,my,F[0]);
-    
+
   double *T=new double[N];
-    
+
   if(Implicit) {
     multiplier *mult;
-  
+
     switch(A) {
       case 2: mult=multbinary; break;
       case 4: mult=multbinary2; break;
       case 6: mult=multbinary3; break;
       case 8: mult=multbinary4; break;
       case 16: mult=multbinary8; break;
-      default: cout << "Multiplication for A=" << A 
+      default: cout << "Multiplication for A=" << A
                     << " is not yet implemented" << endl; exit(1);
     }
 
@@ -195,7 +195,7 @@ int main(int argc, char* argv[])
 //      C.convolve(F[0],F[1]);
       T[i]=seconds();
     }
-    
+
     timings("Implicit",mx,T,N,stats);
 
     if(Direct) {
@@ -203,7 +203,7 @@ int main(int argc, char* argv[])
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=f[i][j];
     }
-    
+
     if(mx*my < outlimit) {
       for(unsigned int i=0; i < mx; i++) {
         for(unsigned int j=0; j < my; j++)
@@ -214,7 +214,7 @@ int main(int argc, char* argv[])
       cout << f[0][0] << endl;
     }
   }
-  
+
   if(Explicit) {
     if(A != 2) {
       cerr << "Explicit convolutions for A=" << A << " are not yet implemented" << endl;
@@ -237,7 +237,7 @@ int main(int argc, char* argv[])
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=F[0][nyp*i+j];
     }
-  
+
     if(mx*my < outlimit) {
       for(unsigned int i=0; i < mx; i++) {
         for(unsigned int j=0; j < my; j++) {
@@ -257,10 +257,10 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,F[0],F[1]);
     T[0]=seconds();
-  
+
     cout << endl;
     timings("Direct",mx,T,1);
-    
+
     if(mx*my < outlimit)
       for(unsigned int i=0; i < mx; i++) {
         for(unsigned int j=0; j < my; j++)
@@ -283,11 +283,11 @@ int main(int argc, char* argv[])
       if (error > 1e-12) cerr << "Caution! error=" << error << endl;
     }
   }
-  
+
   delete [] T;
   for(unsigned int a=0; a < A; ++a)
     deleteAlign(F[a]);
   delete [] F;
-  
+
   return 0;
 }
diff --git a/tests/cconv3.cc b/tests/cconv3.cc
index 0dd5ec94..8234da1a 100644
--- a/tests/cconv3.cc
+++ b/tests/cconv3.cc
@@ -22,9 +22,9 @@ unsigned int mz=4;
 bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 
 
-inline void init(Complex **F, 
-                 unsigned int nxp, unsigned int nyp, unsigned int nzp, 
-                 unsigned int A) 
+inline void init(Complex **F,
+                 unsigned int nxp, unsigned int nyp, unsigned int nzp,
+                 unsigned int A)
 {
   if(A %2 == 0) {
     unsigned int M=A/2;
@@ -62,20 +62,20 @@ int main(int argc, char* argv[])
 
   unsigned int A=2; // Number of independent inputs
   unsigned int B=1; // Number of independent outputs
-  
+
   unsigned int stats=0; // Type of statistics used in timing test.
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeiptA:B:N:m:x:y:z:n:T:S:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -134,27 +134,27 @@ int main(int argc, char* argv[])
     }
   }
 
-  
+
   if(my == 0) my=mx;
 
   nx=cpadding(mx);
   ny=cpadding(my);
   nz=cpadding(mz);
-  
+
   cout << "nx=" << nx << ", ny=" << ny << ", nz=" << ny << endl;
   cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl;
-  
+
   if(N == 0) {
     N=N0/nx/ny/nz;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   size_t align=sizeof(Complex);
-  
+
   array3<Complex> h0;
   if(Direct) h0.Allocate(mx,my,mz,align);
-  
+
   int nxp=Explicit ? nx : mx;
   int nyp=Explicit ? ny : my;
   int nzp=Explicit ? nz : mz;
@@ -167,7 +167,7 @@ int main(int argc, char* argv[])
     cerr << "B=" << B << " is not yet implemented for A=" << A << endl;
     exit(1);
   }
-  
+
   // Allocate input/ouput memory and set up pointers
   Complex **F=new Complex *[A];
   for(unsigned int a=0; a < A; ++a)
@@ -177,17 +177,17 @@ int main(int argc, char* argv[])
   array3<Complex> f(mx,my,mz,F[0]);
 
   double *T=new double[N];
-  
+
   if(Implicit) {
     multiplier *mult;
-  
+
     switch(A) {
       case 2: mult=multbinary; break;
       case 4: mult=multbinary2; break;
       case 6: mult=multbinary3; break;
       case 8: mult=multbinary4; break;
       case 16: mult=multbinary8; break;
-      default: cout << "mult for A=" << A 
+      default: cout << "mult for A=" << A
                     << " is not yet implemented" << endl; exit(1);
     }
 
@@ -200,23 +200,23 @@ int main(int argc, char* argv[])
 //      C.convolve(F[0],F[1]);
       T[i]=seconds();
     }
-    
+
     cout << endl;
     timings("Implicit",mx,T,N,stats);
-    
+
     if(Direct)
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           for(unsigned int k=0; k < mz; k++)
             h0[i][j][k]=f[i][j][k];
 
     cout << endl;
-    if(mx*my*mz < outlimit) 
+    if(mx*my*mz < outlimit)
       cout << f;
-    else 
+    else
       cout << f[0][0][0] << endl;
   }
-  
+
   if(Explicit) {
     if(A != 2) {
       cerr << "Explicit convolutions for A=" << A
@@ -232,12 +232,12 @@ int main(int argc, char* argv[])
       C.convolve(F[0],F[1]);
       T[i]=seconds();
     }
-    
+
     cout << endl;
     timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats);
 
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           for(unsigned int k=0; k < mz; k++)
             h0[i][j][k]=F[0][nyp*nzp*i+nzp*j+k];
@@ -252,7 +252,7 @@ int main(int argc, char* argv[])
         }
         cout << endl;
       }
-    } else { 
+    } else {
       cout << F[0][0] << endl;
     }
   }
@@ -264,7 +264,7 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,F[0],F[1]);
     T[0]=seconds();
-  
+
     timings("Direct",mx,T,1);
 
     if(mx*my*mz < outlimit) {
@@ -291,12 +291,12 @@ int main(int argc, char* argv[])
       }
       if(norm > 0) error=sqrt(error/norm);
       cout << "error=" << error << endl;
-      if (error > 1e-12) 
+      if (error > 1e-12)
         cerr << "Caution! error=" << error << endl;
     }
 
   }
-  
+
   delete [] T;
   for(unsigned int a=0; a < A; ++a)
     deleteAlign(F[a]);
diff --git a/tests/conv.cc b/tests/conv.cc
index 2ab02e25..f21671fa 100644
--- a/tests/conv.cc
+++ b/tests/conv.cc
@@ -24,7 +24,7 @@ void multA(double **F, unsigned int m,
     case 2: multbinary(F,m,indexsize,index,r,threads); break;
     case 4: multbinary2(F,m,indexsize,index,r,threads); break;
     default:
-      cerr << "A=" << A << " is not yet implemented" << endl; 
+      cerr << "A=" << A << " is not yet implemented" << endl;
       exit(1);
   }
 
@@ -36,19 +36,19 @@ void multA(double **F, unsigned int m,
   }
 }
 
-inline void init(Complex **F, unsigned int m,  unsigned int A) 
+inline void init(Complex **F, unsigned int m,  unsigned int A)
 {
-  if(!compact) 
+  if(!compact)
     for(unsigned int i=0; i < A; ++i)
       F[i][m]=0.0;
-                 
+
   const Complex I(0.0,1.0);
   const double E=exp(1.0);
   const double iF=sqrt(3.0);
   const double iG=sqrt(5.0);
-  
+
   if(A % 2 != 0 && A != 1) {
-    cerr << "A=" << A << " is not yet implemented" << endl; 
+    cerr << "A=" << A << " is not yet implemented" << endl;
     exit(1);
   }
   if(A == 1) {
@@ -82,7 +82,7 @@ inline void init(Complex **F, unsigned int m,  unsigned int A)
 
 void test(unsigned int m, Complex *h0)
 {
-  
+
   Complex *h=ComplexAlign(m);
   double error=0.0;
   cout << endl;
@@ -93,7 +93,7 @@ void test(unsigned int m, Complex *h0)
   const double E=exp(1.0);
   const double F=sqrt(3.0);
   const double G=sqrt(5.0);
-  
+
   for(long long k=0; k < mm; k++) {
     h[k]=F*G*(2*mm-1-k)*pow(E,k*I);
     //      h[k]=F*G*(4*m*m*m-6*(k+1)*m*m+(6*k+2)*m+3*k*k*k-3*k)/6.0;
@@ -115,21 +115,21 @@ int main(int argc, char* argv[])
   unsigned int N=0; // Number of iterations.
   unsigned int N0=10000000; // Nominal number of iterations
   unsigned int m=11; // Problem size
-  
+
   int stats=0; // Type of statistics used in timing test.
 
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeiptA:B:N:m:n:T:S:X:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -185,26 +185,26 @@ int main(int argc, char* argv[])
   }
 
   unsigned int n=hpadding(m);
-  
+
   cout << "n=" << n << endl;
   cout << "m=" << m << endl;
-  
+
   if(N == 0) {
     N=N0/n;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   unsigned int np=Explicit ? n/2+1 : m+!compact;
 
   // explicit and direct convolutions are only implemented for binary
   // convolutions.
-  if(!Implicit) 
+  if(!Implicit)
     A=2;
-  
+
   if(B < 1)
     B=1;
-  
+
   unsigned int C=max(A,B);
   Complex *f=ComplexAlign(C*np);
   Complex **F=new Complex *[C];
@@ -222,10 +222,10 @@ int main(int argc, char* argv[])
     cout << "threads=" << C.Threads() << endl << endl;
 
     if (A % 2 != 0) {
-      cerr << "A=" << A << " is not yet implemented" << endl; 
+      cerr << "A=" << A << " is not yet implemented" << endl;
       exit(1);
     }
-    
+
     realmultiplier *mult=0;
     if(B == 1) {
       switch(A) {
@@ -235,7 +235,7 @@ int main(int argc, char* argv[])
       }
     } else
       mult=multA;
-    
+
     for(unsigned int i=0; i < N; ++i) {
       init(F,m,A);
       seconds();
@@ -246,26 +246,26 @@ int main(int argc, char* argv[])
 
     timings("Implicit",m,T,N,stats);
 
-    
+
     if(m < 100) {
       for(unsigned int b=0; b < B; ++b) {
-	for(unsigned int i=0; i < m; i++)
-	  cout << F[b][i] << endl;
-	cout << endl;
+        for(unsigned int i=0; i < m; i++)
+          cout << F[b][i] << endl;
+        cout << endl;
       }
     } else {
       cout << f[0] << endl;
     }
-    
+
     if(Test || Direct) {
       for(unsigned int b=0; b<B; ++b) {
-	for(unsigned int i=0; i < m; i++) {
-	  h0[i+b*m]=F[b][i];
+        for(unsigned int i=0; i < m; i++) {
+          h0[i+b*m]=F[b][i];
         }
       }
     }
   }
-  
+
   if(Explicit) {
     ExplicitHConvolution C(n,m,f);
     for(unsigned int i=0; i < N; ++i) {
@@ -278,16 +278,16 @@ int main(int argc, char* argv[])
     cout << endl;
     timings("Explicit",m,T,N,stats);
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++)
-	cout << f[i] << endl;
+        cout << f[i] << endl;
     else
       cout << f[0] << endl;
-    if(Test || Direct) 
+    if(Test || Direct)
       for(unsigned int i=0; i < m; i++)
-	h0[i]=f[i];
+        h0[i]=f[i];
   }
-  
+
   if(Direct) {
     DirectHConvolution C(m);
     init(F,m,2);
@@ -295,13 +295,13 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,F[0],F[1]);
     T[0]=seconds();
-    
+
     cout << endl;
     timings("Direct",m,T,1);
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++)
-	cout << h[i] << endl;
+        cout << h[i] << endl;
     else
       cout << h[0] << endl;
 
@@ -310,31 +310,31 @@ int main(int argc, char* argv[])
       cout << endl;
       double norm=0.0;
       for(unsigned int b=0; b < B; ++b) {
-	double factor=1.0+b;
-	for(unsigned long long k=0; k < m; k++) {
-	  error += abs2(h0[k+b*m]-factor*h[k]);
-	  norm += abs2(h[k]);
-	}
+        double factor=1.0+b;
+        for(unsigned long long k=0; k < m; k++) {
+          error += abs2(h0[k+b*m]-factor*h[k]);
+          norm += abs2(h[k]);
+        }
       }
       if(norm > 0)
-	error=sqrt(error/norm);
+        error=sqrt(error/norm);
       cout << "error=" << error << endl;
       if (error > 1e-12)
-	cerr << "Caution! error=" << error << endl;
+        cerr << "Caution! error=" << error << endl;
     }
 
-    if(Test) 
+    if(Test)
       for(unsigned int i=0; i < m; i++)
-	h0[i]=h[i];
+        h0[i]=h[i];
     deleteAlign(h);
   }
 
-  if(Test) 
+  if(Test)
     test(m,h0);
-  
+
   delete [] T;
   deleteAlign(f);
   delete [] F;
-  
+
   return 0;
 }
diff --git a/tests/conv2.cc b/tests/conv2.cc
index 4ebbeac6..65d5ae3b 100644
--- a/tests/conv2.cc
+++ b/tests/conv2.cc
@@ -83,14 +83,14 @@ int main(int argc, char* argv[])
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
 #endif
-  
-#ifdef __GNUC__ 
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeipA:B:N:m:x:y:n:T:S:X:Y:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -155,16 +155,16 @@ int main(int argc, char* argv[])
 
   nx=hpadding(mx);
   ny=hpadding(my);
-  
+
   cout << "nx=" << nx << ", ny=" << ny << endl;
   cout << "mx=" << mx << ", my=" << my << endl;
-  
+
   if(N == 0) {
     N=N0/nx/ny;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-    
+
   size_t align=sizeof(Complex);
 
   array2<Complex> h0;
@@ -175,13 +175,13 @@ int main(int argc, char* argv[])
 
   cout << "nxp=" << nxp << ", nyp=" << nyp << endl;
 
-  
+
   if(B < 1) B=1;
   if(B > A) {
     cerr << "B=" << B << " is not yet implemented for A=" << A << endl;
     exit(1);
   }
-    
+
   Complex **F=new Complex *[A];
   for(unsigned int a=0; a < A; ++a)
     F[a]=ComplexAlign(nxp*nyp);
@@ -201,8 +201,8 @@ int main(int argc, char* argv[])
       case 4: mult=multbinary2; break;
       default: cerr << "A=" << A << " is not yet implemented" << endl; exit(1);
     }
-    
-    
+
+
     for(unsigned int i=0; i < N; ++i) {
       init(F,mx,my,nxp,nyp,A,xcompact,ycompact);
       seconds();
@@ -210,28 +210,28 @@ int main(int argc, char* argv[])
 //      C.convolve(f,g);
       T[i]=seconds();
     }
-    
+
     timings("Implicit",mx,T,N,stats);
 
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=f[i+!xcompact][j];
     }
-    
+
     if(nxp*my < outlimit)
       for(unsigned int i=!xcompact; i < nxp; i++) {
         for(unsigned int j=0; j < my; j++)
           cout << f[i][j] << "\t";
         cout << endl;
       } else cout << f[!xcompact][0] << endl;
-    
+
   }
-  
+
   if(Explicit) {
     unsigned int M=A/2;
     ExplicitHConvolution2 C(nx,ny,mx,my,f,M,Pruned);
-    
+
     for(unsigned int i=0; i < N; ++i) {
       init(F,mx,my,nxp,nyp,A,true,true);
       seconds();
@@ -243,14 +243,14 @@ int main(int argc, char* argv[])
     timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats);
 
     unsigned int offset=nx/2-mx+1;
-    
+
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=f[offset+i][j];
     }
 
-    if(2*(mx-1)*my < outlimit) { 
+    if(2*(mx-1)*my < outlimit) {
       for(unsigned int i=offset; i < offset+2*mx-1; i++) {
         for(unsigned int j=0; j < my; j++)
           cout << f[i][j] << "\t";
@@ -260,7 +260,7 @@ int main(int argc, char* argv[])
       cout << f[offset][0] << endl;
     }
   }
-  
+
   if(Direct) {
     unsigned int nxp=2*mx-1;
     array2<Complex> h(nxp,my,align);
@@ -269,7 +269,7 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,F[0],F[1]);
     T[0]=seconds();
-  
+
     cout << endl;
     timings("Direct",mx,T,1);
 
@@ -295,12 +295,12 @@ int main(int argc, char* argv[])
       if (error > 1e-12) cerr << "Caution! error=" << error << endl;
     }
   }
-  
+
   delete [] T;
   for(unsigned int a=0; a < A; ++a)
     deleteAlign(F[a]);
   delete [] F;
-  
-  
+
+
   return 0;
 }
diff --git a/tests/conv3.cc b/tests/conv3.cc
index a615abf0..b6913b5a 100644
--- a/tests/conv3.cc
+++ b/tests/conv3.cc
@@ -38,7 +38,7 @@ inline void init(Complex **F,
     unsigned int M=A/2;
     unsigned int nx=2*mx-1;
     unsigned int ny=2*my-1;
-    
+
     double factor=1.0/sqrt((double) M);
     for(unsigned int s=0; s < M; ++s) {
       double S=sqrt(1.0+s);
@@ -121,7 +121,7 @@ inline void init(array3<Complex>& f, array3<Complex>& g, unsigned int M=1,
 int main(int argc, char* argv[])
 {
   fftw::maxthreads=get_max_threads();
-  
+
   unsigned int A=2; // Number of independent inputs
   unsigned int B=1; // Number of outputs
 
@@ -129,15 +129,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeipA:B:N:m:x:y:z:n:T:S:X:Y:Z:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -176,7 +176,7 @@ int main(int argc, char* argv[])
         break;
       case 'n':
         N0=atoi(optarg);
-        break;     
+        break;
       case 'T':
         fftw::maxthreads=max(atoi(optarg),1);
         break;
@@ -204,29 +204,29 @@ int main(int argc, char* argv[])
   nx=hpadding(mx);
   ny=hpadding(my);
   nz=hpadding(mz);
-  
+
   cout << "nx=" << nx << ", ny=" << ny << ", nz=" << ny << endl;
   cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl;
-  
+
   if(N == 0) {
     N=N0/nx/ny/nz;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-    
+
   size_t align=sizeof(Complex);
   nxp=2*mx-xcompact;
   nyp=2*my-ycompact;
   nzp=mz+!zcompact;
 
   cout << "nxp=" << nxp << ", nyp" << nyp << ", nzp=" << nzp << endl;
-  
+
   if(B < 1) B=1;
   if(B > A) {
     cerr << "B=" << B << " is not yet implemented for A=" << A << endl;
     exit(1);
   }
-  
+
   Complex **F=new Complex *[A];
   for(unsigned int a=0; a < A; ++a)
     F[a]=ComplexAlign(nxp*nyp*nzp);
@@ -242,7 +242,7 @@ int main(int argc, char* argv[])
   if(Implicit) {
     ImplicitHConvolution3 C(mx,my,mz,xcompact,ycompact,zcompact,A,B);
     cout << "threads=" << C.Threads() << endl << endl;
-    
+
     realmultiplier *mult;
     switch(A) {
       case 2: mult=multbinary; break;
@@ -257,11 +257,11 @@ int main(int argc, char* argv[])
 //      C.convolve(f,g);
       T[i]=seconds();
     }
-    
+
     timings("Implicit",mx,T,N,stats);
 
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           for(unsigned int k=0; k < mz; k++)
             h0[i][j][k]=f[i+!xcompact][j+!ycompact][k];
@@ -277,13 +277,13 @@ int main(int argc, char* argv[])
         cout << endl;
       }
     } else cout << f[!xcompact][!ycompact][0] << endl;
-    
+
   }
-  
+
   if(Direct) {
     unsigned int nxp=2*mx-1;
     unsigned int nyp=2*my-1;
-      
+
     array3<Complex> h(nxp,nyp,mz,align);
     array3<Complex> f(nxp,nyp,mz,align);
     array3<Complex> g(nxp,nyp,mz,align);
@@ -305,7 +305,7 @@ int main(int argc, char* argv[])
         cout << endl;
       }
     else cout << h[0][0][0] << endl;
-    
+
     if(Implicit) { // compare implicit version with direct verion:
       double error=0.0;
       double norm=0.0;
@@ -323,12 +323,12 @@ int main(int argc, char* argv[])
     }
 
   }
-  
+
   delete [] T;
   for(unsigned int a=0; a < A; ++a)
     deleteAlign(F[a]);
   delete [] F;
-  
+
 
   return 0;
 }
diff --git a/tests/direct.cc b/tests/direct.cc
index 673e4066..a768238e 100644
--- a/tests/direct.cc
+++ b/tests/direct.cc
@@ -39,7 +39,7 @@ void DirectHConvolution::convolve(Complex *h, Complex *f, Complex *g)
     for(unsigned int j=1; j < m-i; ++j) sum += conj(f[j])*g[i+j];
     h[i]=sum;
   }
-}       
+}
 
 void DirectConvolution2::convolve(Complex *h, Complex *f, Complex *g)
 {
@@ -55,18 +55,18 @@ void DirectConvolution2::convolve(Complex *h, Complex *f, Complex *g)
       h[i*my+j]=sum;
     }
   }
-}       
+}
 
 void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g,
                                    bool symmetrize)
 {
   unsigned int xorigin=mx-1;
-    
+
   if(symmetrize) {
     HermitianSymmetrizeX(mx,my,mx-1,f);
     HermitianSymmetrizeX(mx,my,mx-1,g);
   }
-    
+
   int xstart=-(int)xorigin;
   int ystart=1-(int) my;
   int xstop=mx;
@@ -83,9 +83,9 @@ void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g,
           if(qx >= xstart && qx < xstop) {
             int qy=ky-py;
             if(qy >= ystart && qy < ystop) {
-              sum += ((py >= 0) ? f[(xorigin+px)*my+py] : 
+              sum += ((py >= 0) ? f[(xorigin+px)*my+py] :
                       conj(f[(xorigin-px)*my-py])) *
-                ((qy >= 0) ? g[(xorigin+qx)*my+qy] : 
+                ((qy >= 0) ? g[(xorigin+qx)*my+qy] :
                  conj(g[(xorigin-qx)*my-qy]));
             }
           }
@@ -93,7 +93,7 @@ void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g,
         h[(xorigin+kx)*my+ky]=sum;
       }
     }
-  }     
+  }
 }
 
 void DirectConvolution3::convolve(Complex *h, Complex *f, Complex *g)
@@ -113,20 +113,20 @@ void DirectConvolution3::convolve(Complex *h, Complex *f, Complex *g)
       }
     }
   }
-}       
+}
 
-void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g, 
+void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g,
                                    bool symmetrize)
 {
   unsigned int xorigin=mx-1;
   unsigned int yorigin=my-1;
   unsigned int ny=2*my-1;
-  
+
   if(symmetrize) {
     HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,f);
     HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,g);
   }
-    
+
   int xstart=-(int) xorigin;
   int ystart=-(int) yorigin;
   int zstart=1-(int) mz;
@@ -149,10 +149,10 @@ void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g,
                 if(qy >= ystart && qy < ystop) {
                   int qz=kz-pz;
                   if(qz >= zstart && qz < zstop) {
-                    sum += ((pz >= 0) ? 
-                            f[((xorigin+px)*ny+yorigin+py)*mz+pz] : 
+                    sum += ((pz >= 0) ?
+                            f[((xorigin+px)*ny+yorigin+py)*mz+pz] :
                             conj(f[((xorigin-px)*ny+yorigin-py)*mz-pz])) *
-                      ((qz >= 0) ? g[((xorigin+qx)*ny+yorigin+qy)*mz+qz] :    
+                      ((qz >= 0) ? g[((xorigin+qx)*ny+yorigin+qy)*mz+qz] :
                        conj(g[((xorigin-qx)*ny+yorigin-qy)*mz-qz]));
                   }
                 }
@@ -163,7 +163,7 @@ void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g,
         h[((xorigin+kx)*ny+yorigin+ky)*mz+kz]=sum;
       }
     }
-  }     
+  }
 }
 
 void DirectHTConvolution::convolve(Complex *h, Complex *e, Complex *f,
@@ -198,7 +198,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f,
     HermitianSymmetrizeX(mx,my,mx-1,f);
     HermitianSymmetrizeX(mx,my,mx-1,g);
   }
-    
+
   unsigned int xorigin=mx-1;
   int xstart=-(int) xorigin;
   int xstop=mx;
@@ -212,7 +212,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f,
       Complex sum=0.0;
       for(int px=xstart; px < xstop; ++px) {
         for(int py=ystart; py < ystop; ++py) {
-          Complex E=(py >= 0) ? e[(xorigin+px)*my+py] : 
+          Complex E=(py >= 0) ? e[(xorigin+px)*my+py] :
             conj(e[(xorigin-px)*my-py]);
           for(int qx=xstart; qx < xstop; ++qx) {
             for(int qy=ystart; qy < ystop; ++qy) {
@@ -221,9 +221,9 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f,
                 int ry=ky-py-qy;
                 if(ry >= ystart && ry < ystop) {
                   sum += E *
-                    ((qy >= 0) ? f[(xorigin+qx)*my+qy] : 
+                    ((qy >= 0) ? f[(xorigin+qx)*my+qy] :
                      conj(f[(xorigin-qx)*my-qy])) *
-                    ((ry >= 0) ? g[(xorigin+rx)*my+ry] : 
+                    ((ry >= 0) ? g[(xorigin+rx)*my+ry] :
                      conj(g[(xorigin-rx)*my-ry]));
                 }
               }
@@ -233,7 +233,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f,
         h[(xorigin+kx)*my+ky]=sum;
       }
     }
-  }     
+  }
 }
 
 }
diff --git a/tests/direct.h b/tests/direct.h
index a68d52b9..1dd9de8a 100644
--- a/tests/direct.h
+++ b/tests/direct.h
@@ -7,9 +7,9 @@ namespace fftwpp {
 class DirectConvolution {
 protected:
   unsigned int m;
-public:  
+public:
   DirectConvolution(unsigned int m) : m(m) {}
-  
+
   void convolve(Complex *h, Complex *f, Complex *g);
   void autoconvolve(Complex *h, Complex *f);
 };
@@ -18,9 +18,9 @@ class DirectConvolution {
 class DirectHConvolution {
 protected:
   unsigned int m;
-public:  
+public:
   DirectHConvolution(unsigned int m) : m(m) {}
-  
+
 // Compute h= f (*) g via direct convolution, where f and g contain the m
 // non-negative Fourier components of real functions (contents
 // preserved). The output of m complex values is returned in the array h,
@@ -30,65 +30,65 @@ class DirectHConvolution {
 
 // Out-of-place direct 2D complex convolution.
 class DirectConvolution2 {
-protected:  
+protected:
   unsigned int mx,my;
 public:
   DirectConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my) {}
-  
+
   void convolve(Complex *h, Complex *f, Complex *g);
 };
 
 // Out-of-place direct 2D Hermitian convolution.
 class DirectHConvolution2 {
-protected:  
+protected:
   unsigned int mx,my;
 public:
   DirectHConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my) {}
-  
+
   void convolve(Complex *h, Complex *f, Complex *g, bool symmetrize=true);
 };
 
 // Out-of-place direct 3D complex convolution.
 class DirectConvolution3 {
-protected:  
+protected:
   unsigned int mx,my,mz;
   unsigned int myz;
 public:
-  DirectConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : 
+  DirectConvolution3(unsigned int mx, unsigned int my, unsigned int mz) :
     mx(mx), my(my), mz(mz), myz(my*mz) {}
-  
+
   void convolve(Complex *h, Complex *f, Complex *g);
 };
 
 // Out-of-place direct 3D Hermitian convolution.
 class DirectHConvolution3 {
-protected:  
+protected:
   unsigned int mx,my,mz;
 public:
-  DirectHConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : 
+  DirectHConvolution3(unsigned int mx, unsigned int my, unsigned int mz) :
     mx(mx), my(my), mz(mz) {}
-  
+
   void convolve(Complex *h, Complex *f, Complex *g, bool symmetrize=true);
 };
 
 // Out-of-place direct 1D Hermitian ternary convolution.
 class DirectHTConvolution {
-protected:  
+protected:
   unsigned int m;
 public:
   DirectHTConvolution(unsigned int m) : m(m) {}
-  
+
   void convolve(Complex *h, Complex *e, Complex *f, Complex *g);
 };
 
 // Out-of-place direct 2D Hermitian ternary convolution.
 class DirectHTConvolution2 {
-protected:  
+protected:
   unsigned int mx,my;
 public:
   DirectHTConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my)
   {}
-  
+
   void convolve(Complex *h, Complex *e, Complex *f, Complex *g,
                 bool symmetrize=true);
 };
diff --git a/tests/explicit.cc b/tests/explicit.cc
index c07b1e14..320f241f 100644
--- a/tests/explicit.cc
+++ b/tests/explicit.cc
@@ -15,22 +15,22 @@ void ExplicitConvolution::backwards(Complex *f)
 {
   Backwards->fft(f);
 }
-  
+
 void ExplicitConvolution::forwards(Complex *f)
 {
   Forwards->fft(f);
 }
-  
+
 void ExplicitConvolution::convolve(Complex *f, Complex *g)
 {
   pad(f);
   backwards(f);
-  
+
   pad(g);
   backwards(g);
-      
+
   double ninv=1.0/n;
-  
+
   Vec Ninv=LOAD(ninv);
   PARALLEL(
     for(unsigned int k=0; k < n; ++k)
@@ -46,12 +46,12 @@ void ExplicitHConvolution::pad(Complex *f)
     for(unsigned int i=m; i <= n2; ++i) f[i]=0.0;
     );
 }
-  
+
 void ExplicitHConvolution::backwards(Complex *f)
 {
   cr->fft(f);
 }
-  
+
 void ExplicitHConvolution::forwards(Complex *f)
 {
   rc->fft(f);
@@ -61,19 +61,19 @@ void ExplicitHConvolution::convolve(Complex *f, Complex *g)
 {
   pad(f);
   backwards(f);
-  
+
   pad(g);
   backwards(g);
-      
+
   double *F=(double *) f;
   double *G=(double *) g;
-    
+
   double ninv=1.0/n;
   PARALLEL(
     for(unsigned int k=0; k < n; ++k)
       F[k] *= G[k]*ninv;
     );
-  
+
   forwards(f);
 }
 
@@ -88,7 +88,7 @@ void ExplicitConvolution2::pad(Complex *f)
         f[j]=0.0;
     }
     );
-    
+
   // zero pad right-hand block
   const unsigned int start=mx*ny;
   const unsigned int stop=nx*ny;
@@ -106,7 +106,7 @@ void ExplicitConvolution2::backwards(Complex *f)
   } else
     Backwards->fft(f);
 }
-  
+
 void ExplicitConvolution2::forwards(Complex *f)
 {
   if(prune) {
@@ -115,15 +115,15 @@ void ExplicitConvolution2::forwards(Complex *f)
   } else
     Forwards->fft(f);
 }
-  
+
 void ExplicitConvolution2::convolve(Complex *f, Complex *g)
 {
   pad(f);
   backwards(f);
-  
+
   pad(g);
   backwards(g);
-      
+
   unsigned int n=nx*ny;
   Vec Ninv=LOAD(1.0/n);
   PARALLEL(
@@ -141,10 +141,10 @@ void ExplicitHConvolution2::pad(Complex *f)
   // zero pad left block
   unsigned int stop=(nx2-mx+1)*nyp;
   PARALLEL(
-    for(unsigned int i=0; i < stop; ++i) 
+    for(unsigned int i=0; i < stop; ++i)
       f[i]=0.0;
     );
-    
+
   // zero pad top-middle block
   unsigned int stop2=stop+2*mx*nyp;
   unsigned int diff=nyp-my;
@@ -154,11 +154,11 @@ void ExplicitHConvolution2::pad(Complex *f)
         f[j]=0.0;
     }
     );
-    
+
   // zero pad right block
   stop=nx*nyp;
   PARALLEL(
-    for(unsigned int i=(nx2+mx)*nyp; i < stop; ++i) 
+    for(unsigned int i=(nx2+mx)*nyp; i < stop; ++i)
       f[i]=0.0;
     );
 }
@@ -219,25 +219,25 @@ void ExplicitHConvolution2::convolve(Complex **F, Complex **G, bool symmetrize)
 {
   unsigned int xorigin=nx/2;
   unsigned int nyp=ny/2+1;
-    
+
   for(unsigned int s=0; s < M; ++s) {
     Complex *f=F[s];
     if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,f);
     pad(f);
     backwards(f,false);
-  
+
     Complex *g=G[s];
     if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,g);
     pad(g);
     backwards(g,false);
   }
-    
+
   double ninv=1.0/(nx*ny);
   unsigned int nyp2=2*nyp;
 
   double *f=(double *) F[0];
   double *g=(double *) G[0];
-  
+
   if(M == 1) {
     PARALLEL(
       for(unsigned int i=0; i < nx; ++i) {
@@ -272,7 +272,7 @@ void ExplicitHConvolution2::convolve(Complex **F, Complex **G, bool symmetrize)
       }
       );
   }
-        
+
   forwards(F[0]);
 }
 
@@ -289,7 +289,7 @@ void ExplicitConvolution3::pad(Complex *f)
       }
     }
     );
-    
+
   unsigned int nyz=ny*nz;
   PARALLEL(
     for(unsigned int i=mx; i < nx; ++i) {
@@ -302,7 +302,7 @@ void ExplicitConvolution3::pad(Complex *f)
       }
     }
     );
-    
+
   PARALLEL(
     for(unsigned int i=0; i < nx; ++i) {
       unsigned int nyzi=nyz*i;
@@ -328,7 +328,7 @@ void ExplicitConvolution3::backwards(Complex *f)
   } else
     Backwards->fft(f);
 }
-  
+
 void ExplicitConvolution3::forwards(Complex *f)
 {
   if(prune) {
@@ -346,10 +346,10 @@ void ExplicitConvolution3::convolve(Complex *f, Complex *g)
 {
   pad(f);
   backwards(f);
-  
+
   pad(g);
   backwards(g);
-    
+
   unsigned int n=nx*ny*nz;
   Vec Ninv=LOAD(1.0/n);
   PARALLEL(
@@ -382,23 +382,23 @@ void ExplicitHTConvolution::convolve(Complex *f, Complex *g, Complex *h)
 {
   pad(f);
   backwards(f);
-  
+
   pad(g);
   backwards(g);
-      
+
   pad(h);
   backwards(h);
-        
+
   double *F=(double *) f;
   double *G=(double *) g;
   double *H=(double *) h;
-    
+
   double ninv=1.0/n;
   PARALLEL(
     for(unsigned int k=0; k < n; ++k)
       F[k] *= G[k]*H[k]*ninv;
     );
-    
+
   forwards(f);
 }
 
@@ -415,7 +415,7 @@ void ExplicitHTConvolution2::pad(Complex *f)
         f[j]=0.0;
     }
     );
-    
+
   PARALLEL(
     for(unsigned int i=nx2+mx; i < nx; ++i) {
       unsigned int nypi=nyp*i;
@@ -424,7 +424,7 @@ void ExplicitHTConvolution2::pad(Complex *f)
         f[j]=0.0;
     }
     );
-    
+
   PARALLEL(
     for(unsigned int i=0; i < nx; ++i) {
       unsigned int nypi=nyp*i;
@@ -464,23 +464,23 @@ void ExplicitHTConvolution2::convolve(Complex *f, Complex *g, Complex *h,
 {
   unsigned int xorigin=nx/2;
   unsigned int nyp=ny/2+1;
-    
+
   if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,f);
   pad(f);
   backwards(f,false);
-  
+
   if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,g);
   pad(g);
   backwards(g,false);
-      
+
   if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,h);
   pad(h);
   backwards(h,false);
-        
+
   double *F=(double *) f;
   double *G=(double *) g;
   double *H=(double *) h;
-    
+
   double ninv=1.0/(nx*ny);
   unsigned int nyp2=2*nyp;
 
@@ -492,7 +492,7 @@ void ExplicitHTConvolution2::convolve(Complex *f, Complex *g, Complex *h,
         F[j] *= G[j]*H[j]*ninv;
     }
     );
-        
+
   forwards(f,false);
 }
 
diff --git a/tests/explicit.h b/tests/explicit.h
index 7fc67c8d..f83b6fbd 100644
--- a/tests/explicit.h
+++ b/tests/explicit.h
@@ -8,8 +8,8 @@ class ExplicitConvolution : public ThreadBase {
 protected:
   unsigned int n,m;
   fft1d *Backwards,*Forwards;
-public:  
-  
+public:
+
   // u is a temporary array of size n.
   ExplicitConvolution(unsigned int n, unsigned int m, Complex *u) :
     n(n), m(m) {
@@ -18,17 +18,17 @@ class ExplicitConvolution : public ThreadBase {
 
     threads=Forwards->Threads();
   }
-  
+
   ~ExplicitConvolution() {
     delete Forwards;
     delete Backwards;
-  }    
-  
+  }
+
   void pad(Complex *f);
   void backwards(Complex *f);
   void forwards(Complex *f);
-  
-  // Compute f (*) g. The distinct input arrays f and g are each of size n 
+
+  // Compute f (*) g. The distinct input arrays f and g are each of size n
   // (contents not preserved). The output is returned in f.
   void convolve(Complex *f, Complex *g);
 };
@@ -49,16 +49,16 @@ class ExplicitHConvolution : public ThreadBase {
 
     threads=cr->Threads();
   }
-  
+
   ~ExplicitHConvolution() {
     delete cr;
     delete rc;
   }
-    
+
   void pad(Complex *f);
   void backwards(Complex *f);
   void forwards(Complex *f);
-  
+
 // Compute f (*) g, where f and g contain the m non-negative Fourier
 // components of real functions. Dealiasing is internally implemented via
 // explicit zero-padding to size n >= 3*m.
@@ -95,7 +95,7 @@ class ExplicitConvolution2 : public ThreadBase {
       threads=Forwards->Threads();
     }
   }
-  
+
   ~ExplicitConvolution2() {
     if(prune) {
       delete xForwards;
@@ -106,8 +106,8 @@ class ExplicitConvolution2 : public ThreadBase {
       delete Forwards;
       delete Backwards;
     }
-  }    
-  
+  }
+
   void pad(Complex *f);
   void backwards(Complex *f);
   void forwards(Complex *f);
@@ -127,11 +127,11 @@ class ExplicitHConvolution2 : public ThreadBase {
   mrcfft1d *yForwards;
   crfft2d *Backwards;
   rcfft2d *Forwards;
-  
+
   unsigned int s;
   Complex *ZetaH,*ZetaL;
-public:  
-  ExplicitHConvolution2(unsigned int nx, unsigned int ny, 
+public:
+  ExplicitHConvolution2(unsigned int nx, unsigned int ny,
                         unsigned int mx, unsigned int my,
                         Complex *f, unsigned int M=1,
                         bool pruned=false) :
@@ -156,13 +156,13 @@ class ExplicitHConvolution2 : public ThreadBase {
         yForwards=new mrcfft1d(ny,nx,1,1,rdist,cdist,(double*) f);
         yBackwards=new mcrfft1d(ny,nx,1,1,cdist,rdist,f);
       }
-      
+
     } else {
       Backwards=new crfft2d(nx,ny,f);
       Forwards=new rcfft2d(nx,ny,f);
     }
   }
-  
+
   ~ExplicitHConvolution2() {
     if(prune) {
       delete xForwards;
@@ -173,13 +173,13 @@ class ExplicitHConvolution2 : public ThreadBase {
       delete Forwards;
       delete Backwards;
     }
-  }    
-  
+  }
+
   void pad(Complex *f);
   void backwards(Complex *f, bool shift=true);
   void forwards(Complex *f);
   void convolve(Complex **F, Complex **G, bool symmetrize=true);
-  
+
   // Constructor for special case M=1:
   void convolve(Complex *f, Complex *g, bool symmetrize=true) {
     convolve(&f,&g,symmetrize);
@@ -217,7 +217,7 @@ class ExplicitConvolution3 : public ThreadBase {
       Forwards=new fft3d(nx,ny,nz,-1,f);
     }
   }
-  
+
   ~ExplicitConvolution3() {
     if(prune) {
       delete xForwards;
@@ -230,8 +230,8 @@ class ExplicitConvolution3 : public ThreadBase {
       delete Forwards;
       delete Backwards;
     }
-  }    
-  
+  }
+
   void pad(Complex *f);
   void backwards(Complex *f);
   void forwards(Complex *f);
@@ -255,16 +255,16 @@ class ExplicitHTConvolution : public ThreadBase  {
 
     threads=cr->Threads();
   }
-  
+
   ~ExplicitHTConvolution() {
     delete cr;
     delete rc;
   }
-    
+
   void pad(Complex *f);
   void backwards(Complex *f);
   void forwards(Complex *f);
-  
+
 // Compute the ternary convolution of f, and g, and h, where f, and g, and h
 // contain the m non-negative Fourier components of real
 // functions. Dealiasing is internally implemented via explicit
@@ -290,8 +290,8 @@ class ExplicitHTConvolution2 : public ThreadBase {
   Complex *ZetaH,*ZetaL;
   unsigned int threads;
 
-public:  
-  ExplicitHTConvolution2(unsigned int nx, unsigned int ny, 
+public:
+  ExplicitHTConvolution2(unsigned int nx, unsigned int ny,
                          unsigned int mx, unsigned int my, Complex *f,
                          bool pruned=false) :
     nx(nx), ny(ny), mx(mx), my(my), prune(pruned) {
@@ -303,7 +303,7 @@ class ExplicitHTConvolution2 : public ThreadBase {
       prune=true;
       s=BuildZeta(2*nx,nx,ZetaH,ZetaL);
     }
-    
+
     if(prune) {
       xBackwards=new mfft1d(nx,1,My,nyp,1,f);
       xForwards=new mfft1d(nx,-1,My,nyp,1,f);
@@ -321,7 +321,7 @@ class ExplicitHTConvolution2 : public ThreadBase {
       threads=Forwards->Threads();
     }
   }
-  
+
   ~ExplicitHTConvolution2() {
     if(prune) {
       delete xForwards;
@@ -332,8 +332,8 @@ class ExplicitHTConvolution2 : public ThreadBase {
       delete Forwards;
       delete Backwards;
     }
-  }    
-  
+  }
+
   void pad(Complex *f);
   void backwards(Complex *f, bool shift=true);
   void forwards(Complex *f, bool shift=true);
diff --git a/tests/fft1.cc b/tests/fft1.cc
index f60b8c25..ac04d85f 100644
--- a/tests/fft1.cc
+++ b/tests/fft1.cc
@@ -20,10 +20,10 @@ int main(int argc, char* argv[])
   fftw::maxthreads=get_max_threads();
   int r=-1;
 
-  
-#ifdef __GNUC__ 
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"N:m:x:r:T:S:h");
     if (c == -1) break;
@@ -57,17 +57,17 @@ int main(int argc, char* argv[])
 
 
   size_t align=sizeof(Complex);
-  
+
   array1<Complex> f(m,align);
-  
+
   array1<Complex> g(m,align);
-  
+
   fft1d Forward(-1,f);
   fft1d Backward(1,f);
 
   fft1d Forward0(-1,f,g);
   fft1d Backward0(1,g,f);
-  
+
   for(unsigned int i=0; i < m; i++) f[i]=i;
 
   double *T=new double[N];
diff --git a/tests/fft1r.cc b/tests/fft1r.cc
index 878092bf..3e6d73b7 100644
--- a/tests/fft1r.cc
+++ b/tests/fft1r.cc
@@ -25,10 +25,10 @@ int main(int argc, char* argv[])
 
   fftw::maxthreads=get_max_threads();
   int r=-1;
- 
-#ifdef __GNUC__ 
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c=getopt(argc,argv,"N:m:x:r:T:S:h");
     if (c == -1) break;
@@ -63,10 +63,10 @@ int main(int argc, char* argv[])
   size_t align=sizeof(Complex);
 
   unsigned int mp=m/2+1;
-  
+
   array1<double> f(m+2,align);
   array1<Complex> g(mp,align);
-  
+
   double *T=new double[N];
 
   if(r == -1 || r == 0) {
diff --git a/tests/fft2.cc b/tests/fft2.cc
index bbac6ca9..edac04e4 100644
--- a/tests/fft2.cc
+++ b/tests/fft2.cc
@@ -16,13 +16,13 @@ unsigned int my=4;
 
 bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 
-inline void init(array2<Complex>& f) 
+inline void init(array2<Complex>& f)
 {
   for(unsigned int i=0; i < mx; ++i)
     for(unsigned int j=0; j < my; j++)
       f[i][j]=Complex(i,j);
 }
-  
+
 unsigned int outlimit=100;
 
 int main(int argc, char* argv[])
@@ -34,15 +34,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hN:m:x:y:n:T:S:r:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -80,13 +80,13 @@ int main(int argc, char* argv[])
   if(my == 0) my=mx;
 
   cout << "mx=" << mx << ", my=" << my << endl;
-  
+
   if(N == 0) {
     N=N0/mx/my;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   size_t align=sizeof(Complex);
 
   array2<Complex> f(mx,my,align);
@@ -97,7 +97,7 @@ int main(int argc, char* argv[])
   if(r == -1 || r == 0) { // conventional FFT, in-place
     fft2d Forward2(-1,f);
     fft2d Backward2(1,f);
-    
+
     for(unsigned int i=0; i < N; ++i) {
       init(f);
       seconds();
@@ -108,12 +108,12 @@ int main(int argc, char* argv[])
     }
     timings("fft2d, in-place",mx,T,N,stats);
   }
-  
+
   if(r == -1 || r == 1) { // conventional FFT, out-of-place
 
     fft2d Forward2(-1,f,g);
     fft2d Backward2(1,f,g);
-    
+
     for(unsigned int i=0; i < N; ++i) {
       init(f);
       seconds();
@@ -129,7 +129,7 @@ int main(int argc, char* argv[])
 
     Transpose Txy(mx,my,1,f(),f(),fftw::maxthreads);
     Transpose Tyx(my,mx,1,f(),f(),fftw::maxthreads);
-  
+
     mfft1d Forwardx(mx,-1,my,1,my);
     mfft1d Backwardx(mx,1,my,1,my);
 
@@ -158,7 +158,7 @@ int main(int argc, char* argv[])
 
     Transpose Txy(mx,my,1,f(),g(),fftw::maxthreads);
     Transpose Tyx(my,mx,1,f(),g(),fftw::maxthreads);
-  
+
     mfft1d Forwardx(mx,-1,my,1,my,f,g);
     mfft1d Backwardx(mx,1,my,1,my,f,g);
 
@@ -182,12 +182,12 @@ int main(int argc, char* argv[])
     }
     timings("transpose and mfft, out-of-place",mx,T,N,stats);
   }
-  
+
   if(r == -1 || r == 4) { // full transpose, in-place
 
     Transpose Txy(mx,my,1,f(),f(),fftw::maxthreads);
     Transpose Tyx(my,mx,1,f(),f(),fftw::maxthreads);
-  
+
     mfft1d Forwardx(mx,-1,my,1,my);
     mfft1d Backwardx(mx,1,my,1,my);
 
@@ -219,7 +219,7 @@ int main(int argc, char* argv[])
 
     Transpose Txy(mx,my,1,f(),g(),fftw::maxthreads);
     Transpose Tyx(my,mx,1,f(),g(),fftw::maxthreads);
-  
+
     mfft1d Forwardx(mx,-1,my,1,my,f,g);
     mfft1d Backwardx(mx,1,my,1,my,f,g);
 
@@ -233,9 +233,9 @@ int main(int argc, char* argv[])
       Forwardy.fft(f,g);
       Txy.transpose(g(),f());
       Forwardx.fft(f,g);
-      
+
       Tyx.transpose(g(),f());
-      
+
       Txy.transpose(f(),g());
 
       Backwardx.fft(g,f);
@@ -247,7 +247,7 @@ int main(int argc, char* argv[])
     }
     timings("2 transposes and mfft, out-of-place",mx,T,N,stats);
   }
-  
+
   if(r == -1 || r == 6)  { // using strides, in-place
 
     mfft1d Forwardx(mx,-1,my,my,1,f,f);
@@ -296,7 +296,7 @@ int main(int argc, char* argv[])
     }
     timings("strided mfft out-of-place",mx,T,N,stats);
   }
-  
+
   cout << endl;
   if(mx*my < outlimit) {
     for(unsigned int i=0; i < mx; i++) {
@@ -306,9 +306,9 @@ int main(int argc, char* argv[])
     }
   }
   else cout << f[0][0] << endl;
-    
+
   delete [] T;
-  
+
   return 0;
 }
 
diff --git a/tests/fft2r.cc b/tests/fft2r.cc
index 9428a573..21e3de5d 100644
--- a/tests/fft2r.cc
+++ b/tests/fft2r.cc
@@ -23,19 +23,19 @@ int main(int argc, char* argv[])
 
   unsigned int nx=4;
   unsigned int ny=5;
-    
+
   int N=1000;
   unsigned int stats=MEAN; // Type of statistics used in timing test.
 
   bool inplace=false;
   bool shift=false;
   bool quiet=false;
-  
+
   fftw::maxthreads=get_max_threads();
- 
-#ifdef __GNUC__ 
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c=getopt(argc,argv,"N:i:m:s:x:y:T:S:hq");
     if (c == -1) break;
@@ -79,15 +79,15 @@ int main(int argc, char* argv[])
   size_t align=sizeof(Complex);
 
   unsigned int nyp=ny/2+1;
-  
+
   array2<Complex> g(nx,nyp,align);
   array2<double> f;
-  
+
   if(inplace)
     f.Dimension(nx,2*nyp,(double *) g());
   else
     f.Allocate(nx,ny,align);
-  
+
   rcfft2d Forward(nx,ny,f,g);
   crfft2d Backward(nx,ny,g,f);
 
@@ -120,7 +120,7 @@ int main(int argc, char* argv[])
       cout << endl;
     }
   }
-  
+
   double *T= new double[N];
 
   for(int i=0; i < N; ++i) {
@@ -141,5 +141,5 @@ int main(int argc, char* argv[])
   }
 
   timings("fft2 out-of-place",nx,T,N,stats);
- 
+
 }
diff --git a/tests/fft3.cc b/tests/fft3.cc
index 9daa57ec..e50f098f 100644
--- a/tests/fft3.cc
+++ b/tests/fft3.cc
@@ -17,14 +17,14 @@ unsigned int mz = 4;
 
 bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 
-inline void init(array3<Complex>& f) 
+inline void init(array3<Complex>& f)
 {
   for(unsigned int i = 0; i < mx; ++i)
     for(unsigned int j = 0; j < my; j++)
-      for(unsigned int k = 0; k < mz; k++) 
+      for(unsigned int k = 0; k < mz; k++)
         f(i, j, k)=Complex(10 * k + i, j);
 }
-  
+
 unsigned int outlimit = 100;
 
 int main(int argc, char* argv[])
@@ -36,15 +36,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hN:m:x:y:z:n:T:S:r:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -86,13 +86,13 @@ int main(int argc, char* argv[])
     my = mx;
 
   cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl;
-  
+
   if(N == 0) {
     N = N0 / mx / my;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   size_t align = sizeof(Complex);
 
   array3<Complex> f(mx, my, mz, align);
@@ -103,7 +103,7 @@ int main(int argc, char* argv[])
   if(r == -1 || r == 0) { // conventional FFT, in-place
     fft3d Forward3(-1, f);
     fft3d Backward3(1, f);
-    
+
     for(unsigned int i = 0; i < N; ++i) {
       init(f);
       seconds();
@@ -114,11 +114,11 @@ int main(int argc, char* argv[])
     }
     timings("fft3d, in-place", mx, T, N, stats);
   }
-  
+
   if(r == -1 || r == 1) { // conventional FFT, out-of-place
     fft3d Forward3(-1, f, g);
     fft3d Backward3(1, f, g);
-    
+
     for(unsigned int i = 0; i < N; ++i) {
       init(f);
       seconds();
@@ -131,7 +131,7 @@ int main(int argc, char* argv[])
   }
 
   delete [] T;
-  
+
   return 0;
 }
 
diff --git a/tests/fft3r.cc b/tests/fft3r.cc
index 8ad65fe3..7e73959a 100644
--- a/tests/fft3r.cc
+++ b/tests/fft3r.cc
@@ -10,9 +10,9 @@ using namespace fftwpp;
 
 void finit(array3<double> f, unsigned int nx, unsigned int ny, unsigned int nz)
 {
-  for(unsigned int i=0; i < nx; ++i) 
+  for(unsigned int i=0; i < nx; ++i)
     for(unsigned int j=0; j < ny; ++j)
-      for(unsigned int k=0; k < nz; ++k) 
+      for(unsigned int k=0; k < nz; ++k)
         f(i,j,k)=i+j+k;
 }
 
@@ -30,12 +30,12 @@ int main(int argc,char* argv[])
   bool inplace=false;
   bool shift=false;
   bool quiet=false;
-  
+
   fftw::maxthreads=get_max_threads();
- 
-#ifdef __GNUC__ 
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c=getopt(argc,argv,"N:i:m:qx:y:z:T:S:h");
     if (c == -1) break;
@@ -80,12 +80,12 @@ int main(int argc,char* argv[])
   }
 
   size_t align=sizeof(Complex);
-  
+
   unsigned int nzp=nz/2+1;
-  
+
   array3<Complex> g(nx,ny,nzp,align);
   array3<double> f;
-  
+
   if(inplace)
     f.Dimension(nx,ny,2*nzp,(double *) g());
   else
@@ -93,7 +93,7 @@ int main(int argc,char* argv[])
 
   rcfft3d Forward(nx,ny,nz,f,g);
   crfft3d Backward(nx,ny,nz,g,f);
-  
+
   if(!quiet) {
     finit(f,nx,ny,nz);
     cout << endl << "Input:" << endl;
@@ -127,7 +127,7 @@ int main(int argc,char* argv[])
   }
 
   double *T=new double[N];
-   
+
   for(int i=0; i < N; ++i) {
     finit(f,nx,ny,nz);
     if(shift) {
@@ -145,5 +145,5 @@ int main(int argc,char* argv[])
     }
   }
   timings("fft3 out-of-place",nx,T,N,stats);
-  
+
 }
diff --git a/tests/mfft1.cc b/tests/mfft1.cc
index dda02a07..1a04c93c 100644
--- a/tests/mfft1.cc
+++ b/tests/mfft1.cc
@@ -13,13 +13,13 @@ unsigned int N0=10000000;
 
 bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 
-inline void init(array2<Complex>& f, unsigned int mx, unsigned int my) 
+inline void init(array2<Complex>& f, unsigned int mx, unsigned int my)
 {
   for(unsigned int i=0; i < mx; ++i)
     for(unsigned int j=0; j < my; j++)
       f[i][j]=Complex(i,j);
 }
-  
+
 unsigned int outlimit=100;
 
 int main(int argc, char* argv[])
@@ -34,15 +34,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hN:m:x:y:n:T:S:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -77,18 +77,18 @@ int main(int argc, char* argv[])
   if(my == 0) my=mx;
 
   cout << "mx=" << mx << ", my=" << my << endl;
-  
+
   if(N == 0) {
     N=N0/mx/my;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   size_t align=sizeof(Complex);
 
   array2<Complex> f(mx,my,align);
   array2<Complex> g(mx,my,align);
-  
+
   mfft1d Forward(my,-1,mx,1,my);
   mfft1d Backward(my,1,mx,1,my);
 
@@ -112,7 +112,7 @@ int main(int argc, char* argv[])
         cout << f[i][j] << "\t";
       cout << endl;
     }
-  } else { 
+  } else {
     cout << f[0][0] << endl;
   }
 
@@ -124,11 +124,11 @@ int main(int argc, char* argv[])
         cout << f[i][j] << "\t";
       cout << endl;
     }
-  } else { 
+  } else {
     cout << f[0][0] << endl;
   }
 
-  
+
   cout << endl;
   double *T=new double[N];
   for(unsigned int i=0; i < N; ++i) {
@@ -141,7 +141,7 @@ int main(int argc, char* argv[])
   }
   timings("mfft1, in-place",mx,T,N,stats);
   delete [] T;
-  
+
   return 0;
 }
 
diff --git a/tests/mfft1r.cc b/tests/mfft1r.cc
index 3bd30dda..61485a34 100644
--- a/tests/mfft1r.cc
+++ b/tests/mfft1r.cc
@@ -9,13 +9,13 @@ using namespace Array;
 using namespace fftwpp;
 
 
-inline void init(array2<double>& f, unsigned int mx, unsigned int my) 
+inline void init(array2<double>& f, unsigned int mx, unsigned int my)
 {
   for(unsigned int i=0; i < mx; ++i)
     for(unsigned int j=0; j < my; j++)
       f[i][j] = 10 * i + j;
 }
-  
+
 unsigned int outlimit=100;
 
 int main(int argc, char* argv[])
@@ -35,11 +35,11 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hN:m:x:y:n:T:S:");
     if (c == -1) break;
@@ -87,7 +87,7 @@ int main(int argc, char* argv[])
     if(N < 10) N=10;
   }
   cout << "N=" << N << endl;
-  
+
   size_t align=sizeof(Complex);
 
   array2<double> f(mx,my,align);
@@ -97,7 +97,7 @@ int main(int argc, char* argv[])
   size_t cstride=1;
   size_t rdist=mx;
   size_t cdist=np;
-  
+
   mrcfft1d Forward(mx, // length of transform
                    M,  // number of transforms
                    rstride,
@@ -150,7 +150,7 @@ int main(int argc, char* argv[])
     }
     timings("mfft1r, in-place",mx,T,N,stats);
     delete [] T;
-  }  
+  }
 
   return 0;
 }
diff --git a/tests/tconv.cc b/tests/tconv.cc
index d93e003a..d019cc13 100644
--- a/tests/tconv.cc
+++ b/tests/tconv.cc
@@ -14,10 +14,10 @@ unsigned int N=0;
 unsigned int m=12;
 unsigned int M=1;
 unsigned int B=1;
-  
+
 bool Direct=false, Implicit=true, Explicit=false;
 
-inline void init(Complex *e, Complex *f, Complex *g, unsigned int M=1) 
+inline void init(Complex *e, Complex *f, Complex *g, unsigned int M=1)
 {
   unsigned int m1=m+1;
   unsigned int Mm=M*m1;
@@ -47,15 +47,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeipA:B:N:m:n:T:S:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -89,7 +89,7 @@ int main(int argc, char* argv[])
         break;
       case 'T':
         fftw::maxthreads=max(atoi(optarg),1);
-        break;      
+        break;
       case 'S':
         stats=atoi(optarg);
         break;
@@ -105,25 +105,25 @@ int main(int argc, char* argv[])
 
   cout << "n=" << n << endl;
   cout << "m=" << m << endl;
-  
+
   if(N == 0) {
     N=N0/n;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-  
+
   Complex *h0=NULL;
   if(Direct && ! Explicit) h0=ComplexAlign(m);
 
   unsigned int m1=m+1;
   unsigned int np=Explicit ? n/2+1 : m1;
   if(Implicit) np *= M;
-    
+
   if(B != 1) {
     cerr << "B=" << B << " is not yet implemented" << endl;
     exit(1);
   }
-    
+
   Complex *e=ComplexAlign(np);
   Complex *f=ComplexAlign(np);
   Complex *g=ComplexAlign(np);
@@ -149,20 +149,20 @@ int main(int argc, char* argv[])
 //      C.convolve(e,f,g);
       T[i]=seconds();
     }
-    
+
     timings("Implicit",m,T,N,stats);
 
     if(Direct) for(unsigned int i=0; i < m; i++) h0[i]=e[i];
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++) cout << e[i] << endl;
     else cout << e[0] << endl;
-    
+
     delete [] G;
     delete [] F;
     delete [] E;
   }
-  
+
   if(Explicit) {
     ExplicitHTConvolution C(n,m,f);
     for(unsigned int i=0; i < N; ++i) {
@@ -171,14 +171,14 @@ int main(int argc, char* argv[])
       C.convolve(e,f,g);
       T[i]=seconds();
     }
-    
+
     timings("Explicit",m,T,N,stats);
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++) cout << e[i] << endl;
     else cout << e[0] << endl;
   }
-  
+
   if(Direct) {
     DirectHTConvolution C(m);
     init(e,f,g);
@@ -186,10 +186,10 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,e,f,g);
     T[0]=seconds();
-    
+
     timings("Direct",m,T,1);
 
-    if(m < 100) 
+    if(m < 100)
       for(unsigned int i=0; i < m; i++) cout << h[i] << endl;
     else cout << h[0] << endl;
 
@@ -212,7 +212,7 @@ int main(int argc, char* argv[])
   deleteAlign(g);
   deleteAlign(f);
   deleteAlign(e);
-  
+
   delete [] T;
 
   return 0;
diff --git a/tests/tconv2.cc b/tests/tconv2.cc
index eb7b24dc..7acfac20 100644
--- a/tests/tconv2.cc
+++ b/tests/tconv2.cc
@@ -27,7 +27,7 @@ bool Direct=false, Implicit=true, Explicit=false, Pruned=false;
 unsigned int outlimit=100;
 
 inline void init(Array2<Complex>& e, Array2<Complex>& f, Array2<Complex>& g,
-                 unsigned int M=1) 
+                 unsigned int M=1)
 {
   unsigned int offset=Explicit ? nx/2-mx+1 : 0;
   unsigned int stop=2*mx-1;
@@ -58,15 +58,15 @@ int main(int argc, char* argv[])
 
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hdeiptA:B:N:m:x:y:n:T:S:");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -124,21 +124,21 @@ int main(int argc, char* argv[])
 
   nx=tpadding(mx);
   ny=tpadding(my);
-  
+
   cout << "nx=" << nx << ", ny=" << ny << endl;
   cout << "mx=" << mx << ", my=" << my << endl;
-  
+
   if(N == 0) {
     N=N0/nx/ny;
     N = max(N, 20);
   }
   cout << "N=" << N << endl;
-    
+
   if(B != 1) {
     cerr << "B=" << B << " is not yet implemented" << endl;
     exit(1);
   }
-    
+
   size_t align=sizeof(Complex);
   array2<Complex> h0;
   if(Direct) h0.Allocate(mx,my,align);
@@ -172,11 +172,11 @@ int main(int argc, char* argv[])
 //      C.convolve(e,f,g);
       T[i]=seconds();
     }
-    
+
     timings("Implicit",mx,T,N,stats);
-    
+
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=e[i][j];
     }
@@ -188,12 +188,12 @@ int main(int argc, char* argv[])
         cout << endl;
       } else cout << e[1][0] << endl;
     cout << endl;
-    
+
     delete [] G;
     delete [] F;
     delete [] E;
   }
-  
+
   if(Explicit) {
     ExplicitHTConvolution2 C(nx,ny,mx,my,f,Pruned);
     for(unsigned int i=0; i < N; ++i) {
@@ -202,24 +202,24 @@ int main(int argc, char* argv[])
       C.convolve(e,f,g);
       T[i]=seconds();
     }
-    
+
     timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats);
 
     if(Direct) {
-      for(unsigned int i=0; i < mx; i++) 
+      for(unsigned int i=0; i < mx; i++)
         for(unsigned int j=0; j < my; j++)
           h0[i][j]=e[i][j];
     }
 
     unsigned int offset=nx/2-mx+1;
-    if(2*(mx-1)*my < outlimit) 
+    if(2*(mx-1)*my < outlimit)
       for(unsigned int i=offset; i < offset+2*mx-1; i++) {
         for(unsigned int j=0; j < my; j++)
           cout << e[i][j] << "\t";
         cout << endl;
       } else cout << e[offset][0] << endl;
   }
-  
+
   if(Direct) {
     Explicit=false;
     unsigned int nxp=2*mx-1;
@@ -232,7 +232,7 @@ int main(int argc, char* argv[])
     seconds();
     C.convolve(h,e,f,g);
     T[0]=seconds();
-  
+
     timings("Direct",mx,T,1);
 
     if(nxp*my < outlimit)
@@ -258,7 +258,7 @@ int main(int argc, char* argv[])
     }
 
   }
-  
+
   delete [] T;
 
   return 0;
diff --git a/tests/timing.h b/tests/timing.h
index 6ba3d278..7843dc5b 100644
--- a/tests/timing.h
+++ b/tests/timing.h
@@ -15,7 +15,7 @@
   seconds();
   T[i]=seconds();
   }
-  for(unsigned int i=0; i < N; ++i) 
+  for(unsigned int i=0; i < N; ++i)
   val += T[i];
   return val/N;
   }
@@ -23,10 +23,10 @@
 
 enum timing_algorithm {WRITETOFILE = -1, MEAN, MIN, MAX, MEDIAN, P90, P80, P50};
 
-inline double mean(double *T, unsigned int N, int algorithm) 
+inline double mean(double *T, unsigned int N, int algorithm)
 {
   switch(algorithm) {
-    case WRITETOFILE: 
+    case WRITETOFILE:
     case MEAN: {
       double sum=0.0;
       for(unsigned int i=0; i < N; ++i)
@@ -61,7 +61,7 @@ inline double mean(double *T, unsigned int N, int algorithm)
       std::sort(T,T+N);
       unsigned int start=(int)ceil(N*0.05);
       unsigned int stop=(int)floor(N*0.95);
-      
+
       double sum=0.0;
       for(unsigned int i=start; i < stop; ++i)
         sum += T[i];
@@ -72,7 +72,7 @@ inline double mean(double *T, unsigned int N, int algorithm)
       std::sort(T,T+N);
       unsigned int start=(int)ceil(N*0.1);
       unsigned int stop=(int)floor(N*0.9);
-      
+
       double sum=0.0;
       for(unsigned int i=start; i < stop; ++i)
         sum += T[i];
@@ -83,7 +83,7 @@ inline double mean(double *T, unsigned int N, int algorithm)
       std::sort(T,T+N);
       unsigned int start=(int)ceil(N*0.25);
       unsigned int stop=(int)floor(N*0.75);
-      
+
       double sum=0.0;
       for(unsigned int i=start; i < stop; ++i)
         sum += T[i];
@@ -91,7 +91,7 @@ inline double mean(double *T, unsigned int N, int algorithm)
       break;
     }
     default:
-      std::cout << "Error: invalid algorithm choice: " 
+      std::cout << "Error: invalid algorithm choice: "
                 << algorithm
                 << std::endl;
       exit(1);
@@ -99,12 +99,12 @@ inline double mean(double *T, unsigned int N, int algorithm)
   return 0;
 }
 
-inline void stdev(double *T, unsigned int N, double mean, 
+inline void stdev(double *T, unsigned int N, double mean,
                   double &sigmaL, double& sigmaH,
-                  int algorithm) 
+                  int algorithm)
 {
   switch(algorithm) {
-    case WRITETOFILE: 
+    case WRITETOFILE:
     case MEAN:  {
       sigmaL=0.0, sigmaH=0.0;
       for(unsigned int i=0; i < N; ++i) {
@@ -114,7 +114,7 @@ inline void stdev(double *T, unsigned int N, double mean,
         if(v > 0)
           sigmaH += v*v;
       }
-      double factor=N > 2 ? 2.0/(N-2.0) : 0.0; 
+      double factor=N > 2 ? 2.0/(N-2.0) : 0.0;
       sigmaL=sqrt(sigmaL*factor);
       sigmaH=sqrt(sigmaH*factor);
     }
@@ -156,7 +156,7 @@ inline void stdev(double *T, unsigned int N, double mean,
     }
       break;
     default:
-      std::cout << "Error: invalid algorithm choice: " 
+      std::cout << "Error: invalid algorithm choice: "
                 << algorithm
                 << std::endl;
       exit(1);
@@ -168,15 +168,15 @@ inline void timings(const char* text, unsigned int m, unsigned int count,
 {
 //  mean -= emptytime(T,N);
 //  if(mean < 0.0) mean=0.0;
-  std::cout << text << ":\n" 
-            << m << "\t" 
-            << mean << "\t" 
-            << sigmaL << "\t" 
-            << sigmaH 
+  std::cout << text << ":\n"
+            << m << "\t"
+            << mean << "\t"
+            << sigmaL << "\t"
+            << sigmaH
             << std::endl;
 }
 
-inline void timings(const char* text, unsigned int m, double *T, 
+inline void timings(const char* text, unsigned int m, double *T,
                     unsigned int N, int algorithm=MEAN)
 {
   double sigmaL=0.0, sigmaH=0.0;
diff --git a/tests/transpose.cc b/tests/transpose.cc
index 1bbd2a52..077816d3 100644
--- a/tests/transpose.cc
+++ b/tests/transpose.cc
@@ -15,7 +15,7 @@ unsigned int mx=4;
 unsigned int my=4;
 unsigned int mz=4;
 
-inline void init(array3<Complex>& f, bool transpose=false) 
+inline void init(array3<Complex>& f, bool transpose=false)
 {
   for(unsigned int i=0; i < mx; ++i) {
     for(unsigned int j=0; j < my; ++j) {
@@ -29,7 +29,7 @@ inline void init(array3<Complex>& f, bool transpose=false)
     }
   }
 }
-  
+
 unsigned int outlimit=100;
 
 int main(int argc, char* argv[])
@@ -39,18 +39,18 @@ int main(int argc, char* argv[])
   int stats=0; // Type of statistics used in timing test.
 
   bool inplace = true;
-  
+
 #ifndef __SSE2__
   fftw::effort |= FFTW_NO_SIMD;
-#endif  
-  
-#ifdef __GNUC__ 
+#endif
+
+#ifdef __GNUC__
   optind=0;
-#endif  
+#endif
   for (;;) {
     int c = getopt(argc,argv,"hN:m:x:y:z:n:T:S:i:d");
     if (c == -1) break;
-                
+
     switch (c) {
       case 0:
         break;
@@ -92,13 +92,13 @@ int main(int argc, char* argv[])
     my=mx;
 
   cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl;
-  
+
   cout << "N=" << N << endl;
 
   cout << "inplace=" << inplace << endl;
-  
+
   cout << "threads=" << fftw::maxthreads << endl;
-  
+
   size_t align=sizeof(Complex);
 
   array3<Complex> f(mx,my,mz,align);
@@ -111,17 +111,17 @@ int main(int argc, char* argv[])
     init(f);
 
     cout << "Input:" << endl;
-    if(mx*my*mz < outlimit) 
+    if(mx*my*mz < outlimit)
       cout << f << endl;
-    else 
+    else
       cout << f[0][0][0] << endl;
 
-    transpose.transpose(f(),g()); 
+    transpose.transpose(f(),g());
 
     cout << "Output:" << endl;
-    if(mx*my*mz < outlimit) 
+    if(mx*my*mz < outlimit)
       cout << g << endl;
-    else 
+    else
       cout << g[0][0][0] << endl;
 
     array3<Complex> f0(my,mx,mz,align);
@@ -130,7 +130,7 @@ int main(int argc, char* argv[])
     for(unsigned int i = 0; i < my; ++i) {
       for(unsigned int j = 0; j < mx; ++j) {
         for(unsigned int k = 0; k < mz; ++k) {
-          errmax = max(errmax,abs(g[i][j][k]-f0[i][j][k])); 
+          errmax = max(errmax,abs(g[i][j][k]-f0[i][j][k]));
         }
       }
     }
@@ -142,7 +142,7 @@ int main(int argc, char* argv[])
 
   } else {
     double *T=new double[N];
-  
+
     for(unsigned int i=0; i < N; ++i) {
       init(f);
       seconds();
@@ -152,11 +152,11 @@ int main(int argc, char* argv[])
 
     timings("transpose",mx,T,N,stats);
     delete [] T;
-  }  
+  }
 
   if(!inplace)
     delete pg;
-  
+
   return 0;
 }
 
diff --git a/tests/utils.h b/tests/utils.h
index 7b4294fb..1503dd95 100644
--- a/tests/utils.h
+++ b/tests/utils.h
@@ -5,10 +5,10 @@
 #include "seconds.h"
 #include "timing.h"
 #include "Complex.h"
- 
+
 #ifdef _WIN32
 #include "getopt.h"
-inline double cbrt(double x) 
+inline double cbrt(double x)
 {
   if(x == 0.0) return 0.0;
   static double third=1.0/3.0;
@@ -25,7 +25,7 @@ inline T max(const T a, const S b)
 {
   return a > (T) b ? a : b;
 }
-  
+
 inline void usageCommon(int n)
 {
   std::cerr << "Options: " << std::endl;
@@ -33,9 +33,9 @@ inline void usageCommon(int n)
   std::cerr << "-T\t\t number of threads" << std::endl;
   std::cerr << "-N\t\t number of iterations" << std::endl;
   std::cerr << "-m\t\t size" << std::endl;
-  std::cerr << "-S<int>\t\t stats used in timing test: " 
+  std::cerr << "-S<int>\t\t stats used in timing test: "
             << "0=mean, 1=min, 2=max, 3=median, "
-            << "4=90th percentile, 5=80th percentile, 6=50th percentile" 
+            << "4=90th percentile, 5=80th percentile, 6=50th percentile"
             << std::endl;
   if(n > 1) {
     std::cerr << "-x\t\t x size" << std::endl;
@@ -43,7 +43,7 @@ inline void usageCommon(int n)
   }
   if(n > 2)
     std::cerr << "-z\t\t z size" << std::endl;
-} 
+}
 
 inline void usageDirect()
 {
@@ -56,20 +56,20 @@ inline void usage(int n)
   usageCommon(n);
   std::cerr << "-A\t\t number of data blocks in input" << std::endl;
   std::cerr << "-B\t\t number of data blocks in output" << std::endl;
-} 
+}
 
 inline void usageInplace(int n)
 {
   usageCommon(n);
   std::cerr << "-i\t\t 0=out-of-place, 1=in-place" << std::endl;
-} 
+}
 
-inline void usageTest() 
+inline void usageTest()
 {
   std::cerr << "-t\t\t accuracy test" << std::endl;
 }
-  
-inline void usageExplicit(unsigned int n) 
+
+inline void usageExplicit(unsigned int n)
 {
   usageDirect();
   std::cerr << "-e\t\t explicitly padded convolution" << std::endl;
@@ -144,7 +144,7 @@ inline unsigned int ceilpow2(unsigned int n)
   n |= n >> 16;
   return ++n;
 }
-  
+
 inline unsigned int padding(unsigned int n)
 {
   std::cout << "min padded buffer=" << n << std::endl;