diff --git a/Array.cc b/Array.cc index 1c703dd5..62d56a1c 100644 --- a/Array.cc +++ b/Array.cc @@ -25,23 +25,23 @@ int main() { array3 A(n,m,p); double sum=0.0; - + // Sequential access: int size=A.Size(); for(int i=0; i < size; i++) A(i)=i; - + // Random access: for(int i=0; i < n; i++) { - + // The following statements are equivalent, but the first one optimizes better. array2 Ai=A[i]; // array2 Ai(m,p); Ai=A[i]; // This does an extra memory copy. - + for(int j=0; j < m; j++) { // array1 Aij=Ai[j]; // For 1D arrays: many compilers optimize array1<>::opt better than array1<>. vector Aij=Ai[j]; - + for(int k=0; k < p; k++) { // The following statements are equivalent, but the first one optimizes better. sum=sum+Aij[k]; @@ -49,43 +49,43 @@ int main() } } } - + cout << sum << endl; - - f(A); - g(A); - + + f(A); + g(A); + vector x; Allocate(x,1); x[0]=1.0; cout << h(x) << endl; cout << endl; - + // Arrays with offsets: - + const int offx=-1; const int offy=-1; - + Array1 B(n,offx); // B(offx)...B(n-1+offx) Array2 C(n,m,offx,offy); Array1 D(5); // Functionally equivalent to array1 D(n); - + B=1.0; C=2.0; D=3.0; for(int i=offx; i < n+offx; i++) cout << B[i] << endl; - + cout << endl; - + for(int i=offx; i < n+offx; i++) { Vector Ci=C[i]; - for(int j=offy; j < m+offy; j++) + for(int j=offy; j < m+offy; j++) cout << Ci[j] << endl; } cout << D << endl; - + return 0; } diff --git a/Array.h b/Array.h index 37997da5..ace4c68e 100644 --- a/Array.h +++ b/Array.h @@ -68,13 +68,13 @@ namespace Array { inline std::ostream& _newl(std::ostream& s) {s << '\n'; return s;} inline void ArrayExit(const char *x); - + #ifndef __ExternalArrayExit inline void ArrayExit(const char *x) { std::cerr << _newl << "ERROR: " << x << "." << std::endl; exit(1); -} +} #endif #ifndef __fftwpp_h__ @@ -106,9 +106,9 @@ inline void newAlign(T *&v, size_t len, size_t align) const char *nomem="Memory limits exceeded"; #ifdef HAVE_POSIX_MEMALIGN int rc=posix_memalign(&mem,align,len*sizeof(T)); -#else +#else int rc=posix_memalign0(&mem,align,len*sizeof(T)); -#endif +#endif if(rc == EINVAL) Array::ArrayExit(invalid); if(rc == ENOMEM) Array::ArrayExit(nomem); v=(T *) mem; @@ -124,7 +124,7 @@ inline void deleteAlign(T *v, size_t len) free(v); #else free0(v); -#endif +#endif } #endif @@ -140,7 +140,7 @@ class array1 { virtual unsigned int Size() const {return size;} void CheckSize() const { if(!test(allocated) && size == 0) - ArrayExit("Operation attempted on unallocated array"); + ArrayExit("Operation attempted on unallocated array"); } void CheckEqual(int a, int b, unsigned int dim, unsigned int m) const { if(a != b) { @@ -152,7 +152,7 @@ class array1 { ArrayExit(s.c_str()); } } - + int test(int flag) const {return state & flag;} void clear(int flag) const {state &= ~flag;} void set(int flag) const {state |= flag;} @@ -187,17 +187,17 @@ class array1 { void CheckActivate(size_t align=0) { __checkActivate(1,align); } - + void Allocate(unsigned int nx0, size_t align=0) { Dimension(nx0); CheckActivate(align); } - + void Reallocate(unsigned int nx0, size_t align=0) { Deallocate(); Allocate(nx0,align); } - + array1() : v(NULL), size(0), state(unallocated) {} array1(const void *) : size(0), state(unallocated) {} array1(unsigned int nx0, size_t align=0) : state(unallocated) { @@ -209,11 +209,11 @@ class array1 { state(A.test(temporary)) {} virtual ~array1() {Deallocate();} - + void Freeze() {state=unallocated;} void Hold() {if(test(allocated)) {state=temporary;}} void Purge() const {if(test(temporary)) {Deallocate(); state=unallocated;}} - + virtual void Check(int i, int n, unsigned int dim, unsigned int m, int o=0) const { if(i < 0 || i >= n) { @@ -231,22 +231,22 @@ class array1 { ArrayExit(s.c_str()); } } - + unsigned int Nx() const {return size;} - + #ifdef NDEBUG typedef T *opt; #else typedef array1 opt; #endif - + T& operator [] (int ix) const {__check(ix,size,1,1); return v[ix];} T& operator () (int ix) const {__check(ix,size,1,1); return v[ix];} T* operator () () const {return v;} operator T* () const {return v;} - + array1 operator + (int i) const {return array1(size-i,v+i);} - + void Load(T a) const { __checkSize(); for(unsigned int i=0; i < size; i++) v[i]=a; @@ -260,25 +260,25 @@ class array1 { void Set(T *a) {v=a; clear(allocated);} T Min() { if(size == 0) - ArrayExit("Cannot take minimum of empty array"); + ArrayExit("Cannot take minimum of empty array"); T min=v[0]; for(unsigned int i=1; i < size; i++) if(v[i] < min) min=v[i]; return min; } T Max() { if(size == 0) - ArrayExit("Cannot take maximum of empty array"); + ArrayExit("Cannot take maximum of empty array"); T max=v[0]; for(unsigned int i=1; i < size; i++) if(v[i] > max) max=v[i]; return max; } - + std::istream& Input (std::istream &s) const { __checkSize(); for(unsigned int i=0; i < size; i++) s >> v[i]; return s; } - + array1& operator = (T a) {Load(a); return *this;} array1& operator = (const T *a) {Load(a); return *this;} array1& operator = (const array1& A) { @@ -290,7 +290,7 @@ class array1 { A.Purge(); return *this; } - + array1& operator += (const array1& A) { __checkSize(); for(unsigned int i=0; i < size; i++) v[i] += A(i); @@ -311,7 +311,7 @@ class array1 { for(unsigned int i=0; i < size; i++) v[i] /= A(i); return *this; } - + array1& operator += (T a) { __checkSize(); for(unsigned int i=0; i < size; i++) v[i] += a; @@ -333,7 +333,7 @@ class array1 { for(unsigned int i=0; i < size; i++) v[i] *= ainv; return *this; } - + double L1() const { __checkSize(); double norm=0.0; @@ -368,7 +368,7 @@ class array1 { } return norm; } -#endif +#endif }; template @@ -379,7 +379,7 @@ void swaparray(T& A, T& B) A.Dimension(B); B.Dimension(C); } - + template void leftshiftarray(T& A, T& B, T& C) { @@ -389,7 +389,7 @@ void leftshiftarray(T& A, T& B, T& C) B.Dimension(C); C.Dimension(D); } - + template void rightshiftarray(T& A, T& B, T& C) { @@ -399,7 +399,7 @@ void rightshiftarray(T& A, T& B, T& C) B.Dimension(A); A.Dimension(D); } - + template std::ostream& operator << (std::ostream& s, const array1& A) { @@ -423,7 +423,7 @@ class array2 : public array1 { unsigned int ny; public: using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0) { nx=nx0; ny=ny0; this->size=nx*ny; @@ -433,19 +433,19 @@ class array2 : public array1 { this->v=v0; this->clear(this->allocated); } - void Dimension(const array1 &A) {ArrayExit("Operation not implemented");} - + void Dimension(const array1 &A) {ArrayExit("Operation not implemented");} + void Allocate(unsigned int nx0, unsigned int ny0, size_t align=0) { Dimension(nx0,ny0); __checkActivate(2,align); } - + array2() : nx(0), ny(0) {} array2(unsigned int nx0, unsigned int ny0, size_t align=0) { Allocate(nx0,ny0,align); } array2(unsigned int nx0, unsigned int ny0, T *v0) {Dimension(nx0,ny0,v0);} - + unsigned int Nx() const {return nx;} unsigned int Ny() const {return ny;} @@ -469,7 +469,7 @@ class array2 : public array1 { return this->v[i]; } T* operator () () const {return this->v;} - + array2& operator = (T a) {this->Load(a); return *this;} array2& operator = (T *a) {this->Load(a); return *this;} array2& operator = (const array2& A) { @@ -479,7 +479,7 @@ class array2 : public array1 { A.Purge(); return *this; } - + array2& operator += (const array2& A) { __checkSize(); for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i); @@ -491,7 +491,7 @@ class array2 : public array1 { return *this; } array2& operator *= (const array2& A); - + array2& operator += (T a) { __checkSize(); unsigned int inc=ny+1; @@ -509,7 +509,7 @@ class array2 : public array1 { for(unsigned int i=0; i < this->size; i++) this->v[i] *= a; return *this; } - + void Identity() { this->Load((T) 0); __checkSize(); @@ -547,7 +547,7 @@ class array3 : public array1 { unsigned int nyz; public: using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0) { nx=nx0; ny=ny0; nz=nz0; nyz=ny*nz; this->size=nx*nyz; @@ -557,13 +557,13 @@ class array3 : public array1 { this->v=v0; this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, size_t align=0) { Dimension(nx0,ny0,nz0); __checkActivate(3,align); } - + array3() : nx(0), ny(0), nz(0), nyz(0) {} array3(unsigned int nx0, unsigned int ny0, unsigned int nz0, size_t align=0) { @@ -572,7 +572,7 @@ class array3 : public array1 { array3(unsigned int nx0, unsigned int ny0, unsigned int nz0, T *v0) { Dimension(nx0,ny0,nz0,v0); } - + unsigned int Nx() const {return nx;} unsigned int Ny() const {return ny;} unsigned int Nz() const {return nz;} @@ -592,7 +592,7 @@ class array3 : public array1 { return this->v[i]; } T* operator () () const {return this->v;} - + array3& operator = (T a) {this->Load(a); return *this;} array3& operator = (T *a) {this->Load(a); return *this;} array3& operator = (const array3& A) { @@ -600,10 +600,10 @@ class array3 : public array1 { __checkEqual(ny,A.Ny(),3,2); __checkEqual(nz,A.Nz(),3,3); this->Load(A()); - A.Purge(); + A.Purge(); return *this; } - + array3& operator += (array3& A) { __checkSize(); for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i); @@ -614,7 +614,7 @@ class array3 : public array1 { for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i); return *this; } - + array3& operator += (T a) { __checkSize(); unsigned int inc=nyz+nz+1; @@ -664,7 +664,7 @@ class array4 : public array1 { unsigned int nyzw; public: using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0) { nx=nx0; ny=ny0; nz=nz0; nw=nw0; nzw=nz*nw; nyzw=ny*nzw; @@ -676,13 +676,13 @@ class array4 : public array1 { this->v=v0; this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, size_t align=0) { Dimension(nx0,ny0,nz0,nw0); __checkActivate(4,align); } - + array4() : nx(0), ny(0), nz(0), nw(0), nyz(0), nzw(0), nyzw(0) {} array4(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, size_t align=0) {Allocate(nx0,ny0,nz0,nw0,align);} @@ -712,7 +712,7 @@ class array4 : public array1 { return this->v[i]; } T* operator () () const {return this->v;} - + array4& operator = (T a) {this->Load(a); return *this;} array4& operator = (T *a) {this->Load(a); return *this;} array4& operator = (const array4& A) { @@ -724,7 +724,7 @@ class array4 : public array1 { A.Purge(); return *this; } - + array4& operator += (array4& A) { __checkSize(); for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i); @@ -735,7 +735,7 @@ class array4 : public array1 { for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i); return *this; } - + array4& operator += (T a) { __checkSize(); unsigned int inc=nyzw+nzw+nw+1; @@ -765,7 +765,7 @@ std::ostream& operator << (std::ostream& s, const array4& A) s << _newl; } s << _newl; - } + } s << std::flush; return s; } @@ -789,7 +789,7 @@ class array5 : public array1 { unsigned int nyzwv; public: using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0) { nx=nx0; ny=ny0; nz=nz0; nw=nw0; nv=nv0; nwv=nw*nv; nzwv=nz*nwv; @@ -802,13 +802,13 @@ class array5 : public array1 { this->v=v0; this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0, size_t align=0) { Dimension(nx0,ny0,nz0,nw0,nv0); __checkActivate(5,align); } - + array5() : nx(0), ny(0), nz(0), nw(0), nv(0), nwv(0), nzwv(0), nyzwv(0) {} array5(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0, size_t align=0) { @@ -842,7 +842,7 @@ class array5 : public array1 { return this->v[i]; } T* operator () () const {return this->v;} - + array5& operator = (T a) {this->Load(a); return *this;} array5& operator = (T *a) {this->Load(a); return *this;} array5& operator = (const array5& A) { @@ -855,7 +855,7 @@ class array5 : public array1 { A.Purge(); return *this; } - + array5& operator += (array5& A) { __checkSize(); for(unsigned int i=0; i < this->size; i++) this->v[i] += A(i); @@ -866,7 +866,7 @@ class array5 : public array1 { for(unsigned int i=0; i < this->size; i++) this->v[i] -= A(i); return *this; } - + array5& operator += (T a) { __checkSize(); unsigned int inc=nyzwv+nzwv+nwv+nv+1; @@ -899,7 +899,7 @@ std::ostream& operator << (std::ostream& s, const array5& A) s << _newl; } s << _newl; - } + } s << std::flush; return s; } @@ -928,7 +928,7 @@ class Array1 : public array1 { voff=this->v-ox; } using array1::Dimension; - + void Dimension(unsigned int nx0, int ox0=0) { this->size=nx0; ox=ox0; @@ -948,7 +948,7 @@ class Array1 : public array1 { __checkActivate(1,align); Offsets(); } - + void Reallocate(unsigned int nx0, int ox0=0, size_t align=0) { this->Deallocate(); Allocate(nx0,ox0,align); @@ -969,16 +969,16 @@ class Array1 : public array1 { typedef T *opt; #else typedef Array1 opt; -#endif - +#endif + T& operator [] (int ix) const {__check(ix,this->size,ox,1,1); return voff[ix];} T& operator () (int i) const {__check(i,this->size,0,1,1); return this->v[i];} T* operator () () const {return this->v;} operator T* () const {return this->v;} - + Array1 operator + (int i) const {return Array1(this->size-i,this->v+i,ox);} void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);} - + Array1& operator = (T a) {this->Load(a); return *this;} Array1& operator = (const T *a) {this->Load(a); return *this;} Array1& operator = (const Array1& A) { @@ -995,7 +995,7 @@ class Array1 : public array1 { A.Purge(); return *this; } - + int Ox() const {return ox;} }; @@ -1010,7 +1010,7 @@ class Array2 : public array2 { voff=vtemp-oy; } using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, int ox0=0, int oy0=0) { this->nx=nx0; this->ny=ny0; this->size=this->nx*this->ny; @@ -1023,7 +1023,7 @@ class Array2 : public array2 { Dimension(nx0,ny0,ox0,oy0); this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, int ox0=0, int oy0=0, size_t align=0) { Dimension(nx0,ny0,ox0,oy0); @@ -1050,7 +1050,7 @@ class Array2 : public array2 { return Array1(this->ny,vtemp+ix*(int) this->ny,oy); } #endif - + T& operator () (int ix, int iy) const { __check(ix,this->nx,ox,2,1); __check(iy,this->ny,oy,2,2); @@ -1062,7 +1062,7 @@ class Array2 : public array2 { } T* operator () () const {return this->v;} void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);} - + Array2& operator = (T a) {this->Load(a); return *this;} Array2& operator = (T *a) {this->Load(a); return *this;} Array2& operator = (const Array2& A) { @@ -1083,10 +1083,10 @@ class Array2 : public array2 { A.Purge(); return *this; } - + int Ox() const {return ox;} int Oy() const {return oy;} - + }; template @@ -1100,7 +1100,7 @@ class Array3 : public array3 { voff=vtemp-oy*(int) this->nz-oz; } using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0, int ox0=0, int oy0=0, int oz0=0) { this->nx=nx0; this->ny=ny0; this->nz=nz0; this->nyz=this->ny*this->nz; @@ -1114,14 +1114,14 @@ class Array3 : public array3 { Dimension(nx0,ny0,nz0,ox0,oy0,oz0); this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, int ox0=0, int oy0=0, int oz0=0, size_t align=0) { Dimension(nx0,ny0,nz0,ox0,oy0,oz0); __checkActivate(3,align); Offsets(); } - + Array3() : ox(0), oy(0), oz(0) {} Array3(unsigned int nx0, unsigned int ny0, unsigned int nz0, int ox0=0, int oy0=0, int oz0=0, size_t align=0) { @@ -1131,7 +1131,7 @@ class Array3 : public array3 { int ox0=0, int oy0=0, int oz0=0) { Dimension(nx0,ny0,nz0,v0,ox0,oy0,oz0); } - + Array2 operator [] (int ix) const { __check(ix,this->nx,ox,3,1); return Array2(this->ny,this->nz,vtemp+ix*(int) this->nyz,oy,oz); @@ -1148,7 +1148,7 @@ class Array3 : public array3 { } T* operator () () const {return this->v;} void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);} - + Array3& operator = (T a) {this->Load(a); return *this;} Array3& operator = (T *a) {this->Load(a); return *this;} Array3& operator = (const Array3& A) { @@ -1159,7 +1159,7 @@ class Array3 : public array3 { __checkEqual(oy,A.Oy(),3,2); __checkEqual(oz,A.Oz(),3,3); this->Load(A()); - A.Purge(); + A.Purge(); return *this; } Array3& operator = (const array3& A) { @@ -1170,10 +1170,10 @@ class Array3 : public array3 { __checkEqual(oy,0,3,2); __checkEqual(oz,0,3,3); this->Load(A()); - A.Purge(); + A.Purge(); return *this; } - + int Ox() const {return ox;} int Oy() const {return oy;} int Oz() const {return oz;} @@ -1191,9 +1191,9 @@ class Array4 : public array4 { voff=vtemp-oy*(int) this->nzw-oz*(int) this->nw-ow; } using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0, - unsigned int nw0, + unsigned int nw0, int ox0=0, int oy0=0, int oz0=0, int ow0=0) { this->nx=nx0; this->ny=ny0; this->nz=nz0; this->nw=nw0; this->nzw=this->nz*this->nw; this->nyzw=this->ny*this->nzw; @@ -1208,15 +1208,15 @@ class Array4 : public array4 { Dimension(nx0,ny0,nz0,nw0,ox0,oy0,oz0,ow0); this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, int ox0=0, int oy0=0, int oz0=0, int ow0=0, size_t align=0) { Dimension(nx0,ny0,nz0,nw0,ox0,oy0,oz0,ow0); - __checkActivate(4,align); + __checkActivate(4,align); Offsets(); } - + Array4() : ox(0), oy(0), oz(0), ow(0) {} Array4(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, @@ -1247,10 +1247,10 @@ class Array4 : public array4 { } T* operator () () const {return this->v;} void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);} - + Array4& operator = (T a) {this->Load(a); return *this;} Array4& operator = (T *a) {this->Load(a); return *this;} - + Array4& operator = (const Array4& A) { __checkEqual(this->nx,A.Nx(),4,1); __checkEqual(this->ny,A.Ny(),4,2); @@ -1281,7 +1281,7 @@ class Array4 : public array4 { A.Purge(); return *this; } - + int Ox() const {return ox;} int Oy() const {return oy;} int Oz() const {return oz;} @@ -1299,7 +1299,7 @@ class Array5 : public array5 { voff=vtemp-oy*(int) this->nzwv-oz*(int) this->nwv-ow*(int) this->nv-ov; } using array1::Dimension; - + void Dimension(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0, int ox0=0, int oy0=0, int oz0=0, int ow0=0, int ov0=0) { @@ -1317,16 +1317,16 @@ class Array5 : public array5 { Dimension(nx0,ny0,nz0,nw0,nv0,ox0,oy0,oz0,ow0,ov0); this->clear(this->allocated); } - + void Allocate(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0, int ox0=0, int oy0=0, int oz0=0, int ow0=0, int ov0=0, size_t align=0) { Dimension(nx0,ny0,nz0,nw0,nv0,ox0,oy0,oz0,ow0,ov0); - __checkActivate(5,align); + __checkActivate(5,align); Offsets(); } - + Array5() : ox(0), oy(0), oz(0), ow(0), ov(0) {} Array5(unsigned int nx0, unsigned int ny0, unsigned int nz0, unsigned int nw0, unsigned int nv0, int ox0=0, int oy0=0, @@ -1359,10 +1359,10 @@ class Array5 : public array5 { } T* operator () () const {return this->v;} void Set(T *a) {this->v=a; Offsets(); this->clear(this->allocated);} - + Array5& operator = (T a) {this->Load(a); return *this;} Array5& operator = (T *a) {this->Load(a); return *this;} - + Array5& operator = (const Array5& A) { __checkEqual(this->nx,A.Nx(),5,1); __checkEqual(this->ny,A.Ny(),5,2); @@ -1540,13 +1540,13 @@ inline void Allocate(T *&A, unsigned int n, size_t align=0) template inline void Allocate(array1& A, unsigned int n, size_t align=0) -{ +{ A.Allocate(n,align); } template inline void Allocate(Array1& A, unsigned int n, size_t align=0) -{ +{ A.Allocate(n,align); } @@ -1559,7 +1559,7 @@ inline void Allocate(T *&A, unsigned int n, int o, size_t align=0) template inline void Allocate(Array1& A, unsigned int n, int o, size_t align=0) -{ +{ A.Allocate(n,o,align); } @@ -1571,13 +1571,13 @@ inline void Deallocate(T *A) template inline void Deallocate(array1& A) -{ +{ A.Deallocate(); } template inline void Deallocate(Array1& A) -{ +{ A.Deallocate(); } @@ -1589,32 +1589,32 @@ inline void Deallocate(T *A, int o) template inline void Deallocate(Array1& A, int) -{ +{ A.Deallocate(); } template inline void Reallocate(T *&A, unsigned int n, size_t align=0) -{ +{ if(A) delete [] A; Allocate(A,n,align); } template inline void Reallocate(array1& A, unsigned int n) -{ +{ A.Reallocate(n); } template inline void Reallocate(Array1& A, unsigned int n) -{ +{ A.Reallocate(n); } template inline void Reallocate(T *&A, unsigned int n, int o, size_t align=0) -{ +{ if(A) delete [] A; Allocate(A,n,align); A -= o; @@ -1622,7 +1622,7 @@ inline void Reallocate(T *&A, unsigned int n, int o, size_t align=0) template inline void Reallocate(Array1& A, unsigned int n, int o, size_t align=0) -{ +{ A.Reallocate(n,o,align); } diff --git a/Complex.h b/Complex.h index 67edc26a..a56f2513 100644 --- a/Complex.h +++ b/Complex.h @@ -1,4 +1,4 @@ -/* +/* Copyright (C) 1988 Free Software Foundation written by Doug Lea (dl@rocky.oswego.edu) @@ -43,65 +43,65 @@ class Complex Complex() {} Complex(double r, double i=0) : re(r), im(i) {} Complex(const Complex& y) : re(y.re), im(y.im) {} - + ~Complex() {} double real() const {return re;} double imag() const {return im;} const Complex& operator = (const Complex& y); - + const Complex& operator += (const Complex& y); const Complex& operator += (double y); const Complex& operator -= (const Complex& y); const Complex& operator -= (double y); const Complex& operator *= (const Complex& y); const Complex& operator *= (double y); - const Complex& operator /= (const Complex& y); - const Complex& operator /= (double y); - + const Complex& operator /= (const Complex& y); + const Complex& operator /= (double y); + void error(char* msg) const; }; // inline members -inline const Complex& Complex::operator = (const Complex& y) +inline const Complex& Complex::operator = (const Complex& y) -{ - re = y.re; im = y.im; return *this; -} +{ + re = y.re; im = y.im; return *this; +} inline const Complex& Complex::operator += (const Complex& y) -{ - re += y.re; im += y.im; return *this; +{ + re += y.re; im += y.im; return *this; } inline const Complex& Complex::operator += (double y) -{ - re += y; return *this; +{ + re += y; return *this; } inline const Complex& Complex::operator -= (const Complex& y) -{ - re -= y.re; im -= y.im; return *this; +{ + re -= y.re; im -= y.im; return *this; } inline const Complex& Complex::operator -= (double y) -{ - re -= y; return *this; +{ + re -= y; return *this; } inline const Complex& Complex::operator *= (const Complex& y) -{ +{ double r = re * y.re - im * y.im; - im = re * y.im + im * y.re; - re = r; - return *this; + im = re * y.im + im * y.re; + re = r; + return *this; } inline const Complex& Complex::operator *= (double y) -{ - re *= y; im *= y; return *this; +{ + re *= y; im *= y; return *this; } inline const Complex& Complex::operator /= (const Complex& y) @@ -313,9 +313,9 @@ inline bool isfinite(const Complex& z) { #ifdef _WIN32 return _finite(z.re) && _finite(z.im); -#else +#else return !(std::isinf(z.re) || std::isnan(z.re) || std::isinf(z.im) || std::isnan(z.im)); -#endif +#endif } #endif diff --git a/align.h b/align.h index f6f2e94f..fe891deb 100644 --- a/align.h +++ b/align.h @@ -18,8 +18,8 @@ #ifdef __Array_h__ namespace Array { -static const array1 NULL1; -static const array2 NULL2; +static const array1 NULL1; +static const array2 NULL2; static const array3 NULL3; } @@ -64,9 +64,9 @@ inline void newAlign(T *&v, size_t len, size_t align) const char *nomem="Memory limits exceeded"; #ifdef HAVE_POSIX_MEMALIGN int rc=posix_memalign(&mem,align,len*sizeof(T)); -#else +#else int rc=posix_memalign0(&mem,align,len*sizeof(T)); -#endif +#endif if(rc == EINVAL) std::cerr << invalid << std::endl; if(rc == ENOMEM) std::cerr << nomem << std::endl; v=(T *) mem; @@ -81,7 +81,7 @@ inline void deleteAlign(T *v, size_t len) free(v); #else free0(v); -#endif +#endif } } @@ -115,7 +115,7 @@ inline void deleteAlign(T *p) free(p); #else Array::free0(p); -#endif +#endif } } diff --git a/cmult-sse2.h b/cmult-sse2.h index 80595b1c..66858a62 100644 --- a/cmult-sse2.h +++ b/cmult-sse2.h @@ -32,42 +32,42 @@ union uvec { unsigned u[4]; Vec v; }; - + extern const union uvec sse2_pm; extern const union uvec sse2_mm; #if defined(__INTEL_COMPILER) || !defined(__GNUC__) -static inline Vec operator -(const Vec& a) +static inline Vec operator -(const Vec& a) { return _mm_xor_pd(sse2_mm.v,a); } -static inline Vec operator +(const Vec& a, const Vec& b) +static inline Vec operator +(const Vec& a, const Vec& b) { return _mm_add_pd(a,b); } -static inline Vec operator -(const Vec& a, const Vec& b) +static inline Vec operator -(const Vec& a, const Vec& b) { return _mm_sub_pd(a,b); } -static inline Vec operator *(const Vec& a, const Vec& b) +static inline Vec operator *(const Vec& a, const Vec& b) { return _mm_mul_pd(a,b); } -static inline void operator +=(Vec& a, const Vec& b) +static inline void operator +=(Vec& a, const Vec& b) { a=_mm_add_pd(a,b); } -static inline void operator -=(Vec& a, const Vec& b) +static inline void operator -=(Vec& a, const Vec& b) { a=_mm_sub_pd(a,b); } -static inline void operator *=(Vec& a, const Vec& b) +static inline void operator *=(Vec& a, const Vec& b) { a=_mm_mul_pd(a,b); } @@ -113,32 +113,32 @@ class Vec { public: double x; double y; - + Vec() {}; Vec(double x, double y) : x(x), y(y) {}; Vec(const Vec &v) : x(v.x), y(v.y) {}; Vec(const Complex &z) : x(z.re), y(z.im) {}; - + const Vec& operator += (const Vec& v) { - x += v.x; - y += v.y; + x += v.x; + y += v.y; return *this; } - + const Vec& operator -= (const Vec& v) { - x -= v.x; - y -= v.y; + x -= v.x; + y -= v.y; return *this; } - + const Vec& operator *= (const Vec& v) { - x *= v.x; - y *= v.y; + x *= v.x; + y *= v.y; return *this; } }; -static inline Vec operator -(const Vec& a) +static inline Vec operator -(const Vec& a) { return Vec(-a.x,-a.y); } diff --git a/convolution.cc b/convolution.cc index 89ab3018..a4e99ca9 100644 --- a/convolution.cc +++ b/convolution.cc @@ -28,10 +28,10 @@ unsigned int BuildZeta(double arg, unsigned int m, unsigned int t=m/s; if(s*t < m) ++t; ZetaH=ComplexAlign(t); - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int a=0; a < t; ++a) { double theta=s*a*arg; ZetaH[a]=Complex(cos(theta),sin(theta)); @@ -39,7 +39,7 @@ unsigned int BuildZeta(double arg, unsigned int m, ZetaL=ComplexAlign(s); #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int b=0; b < s; ++b) { double theta=b*arg; ZetaL[b]=Complex(cos(theta),sin(theta)); @@ -55,19 +55,19 @@ unsigned int BuildZeta(unsigned int n, unsigned int m, void ImplicitConvolution::convolve(Complex **F, multiplier *pmult, unsigned int i, unsigned int offset) -{ +{ if(indexsize >= 1) index[indexsize-1]=i; - + unsigned int C=max(A,B); Complex *P[C]; for(unsigned int a=0; a < C; ++a) P[a]=F[a]+offset; - + // Backwards FFT (even indices): for(unsigned int a=0; a < A; ++a) { BackwardsO->fft(P[a],U[a]); } - + if(A >= B) (*pmult)(U,m,indexsize,index,0,threads); // multiply even indices @@ -82,14 +82,14 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult, if(A > B) { // U[A-1] is free Complex *W[A]; W[A-1]=U[A-1]; - for(unsigned int a=1; a < A; ++a) + for(unsigned int a=1; a < A; ++a) W[a-1]=P[a]; for(unsigned int a=A; a-- > 0;) // Loop from A-1 to 0. BackwardsO->fft(P[a],W[a]); - + (*pmult)(W,m,indexsize,index,1,threads); // multiply odd indices - + // Return to original space Complex *lastW=W[A-1]; for(unsigned int b=0; b < B; ++b) { @@ -98,24 +98,24 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult, ForwardsO->fft(U[b],lastW); posttransform(Pb,lastW); } - + } else if(A < B) { // U[B-1] is free Complex *W[B]; W[B-1]=U[B-1]; - for(unsigned int b=1; b < B; ++b) + for(unsigned int b=1; b < B; ++b) W[b-1]=P[b]; for(unsigned int a=A; a-- > 0;) // Loop from A-1 to 0. BackwardsO->fft(P[a],W[a]); - + (*pmult)(W,m,indexsize,index,1,threads); // multiply odd indices - + // Return to original space for(unsigned int b=0; b < B; ++b) ForwardsO->fft(W[b],P[b]); - + (*pmult)(U,m,indexsize,index,0,threads); // multiply even indices - + Complex *f0=P[0]; Complex *u0=U[0]; Forwards->fft(u0); @@ -127,7 +127,7 @@ void ImplicitConvolution::convolve(Complex **F, multiplier *pmult, ForwardsO->fft(ub,u0); posttransform(fb,u0); } - + } else { // A == B // Backwards FFT (odd indices): for(unsigned int a=0; a < A; ++a) @@ -218,7 +218,7 @@ pretransform(Complex **F, unsigned int k, Vec& Zetak) // multiply by root of unity to prepare for inverse FFT for odd modes template void ImplicitConvolution::pretransform(Complex **F) -{ +{ PARALLEL( for(unsigned int K=0; K < m; K += s) { Complex *ZetaL0=ZetaL-K; @@ -259,7 +259,7 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U) { Vec Mhalf=LOAD(-0.5); Vec HSqrt3=LOAD(hsqrt3); - + double Re=0.0, Im=0.0; unsigned int m1=m-1; @@ -279,12 +279,12 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U) Vec A=ZMULTC(zeta1,UNPACKL(B,Fb)); // Optimize? B=ZMULTIC(zeta1,UNPACKH(B,Fb)); STORE(f1c,CONJ(A+B)); - + double re=F[c].re; Re=2.0*re; Im=re+sqrt3*F[c].im; } - + unsigned int c1=c+1; unsigned int d=c1/2; unsigned int a=c1/s; @@ -306,21 +306,21 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U) for(unsigned int k=max(1,K); k < stop; ++k) { Vec zetak=ZMULT(X,Y,LOAD(ZetaL0+k)); Vec Zetak=ZMULTC(zetak,Zetac1); - + Vec Fa=LOAD(F+k); Vec FA=LOAD(fpc1-k); Vec FB=LOAD(fmc1+k); Vec Fb=LOAD(fm-k); - + Vec b=Fb*Mhalf+CONJ(Fa); STORE(F+k,Fa+CONJ(Fb)); Fb *= HSqrt3; Vec a=ZMULTC(zetak,UNPACKL(b,Fb)); b=ZMULTIC(zetak,UNPACKH(b,Fb)); - + STORE(fmc1+k,CONJ(a+b)); STORE(U+k,a-b); - + b=FB*Mhalf+CONJ(FA); STORE(fpc1-k,FA+CONJ(FB)); FB *= HSqrt3; @@ -332,7 +332,7 @@ void ImplicitHConvolution::pretransform(Complex *F, Complex *f1c, Complex *U) } } ); - + if(even) { F[c]=Re; U[c]=Im; @@ -348,7 +348,7 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w, Vec Mhalf=LOAD(-0.5); Vec HSqrt3=LOAD(hsqrt3); - unsigned int m1=m-1; + unsigned int m1=m-1; unsigned int c1=c+1; unsigned int d=c1/2; unsigned int a=c1/s; @@ -377,7 +377,7 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w, F2=ZMULT(Zeta1,LOAD(U+c)); STORE(f0+c,F0+F1+F2); } - + unsigned int D=c-d; PARALLEL( for(unsigned int K=0; K <= D; K += s) { @@ -393,27 +393,27 @@ void ImplicitHConvolution::posttransform(Complex *F, const Complex& w, for(unsigned int k=max(even+1,K); k < stop; ++k) { Vec zetak=ZMULT(X,Y,LOAD(ZetaL0+k)); Vec Zetak=ZMULTC(zetak,Zetac1); - + Vec F0=LOAD(F+k)*Ninv; Vec F1=ZMULTC(zetak,LOAD(fmc1+k)); Vec F2=ZMULT(zetak,LOAD(U+k)); Vec S=F1+F2; F2=CONJ(F0+Mhalf*S)-HSqrt3*FLIP(F1-F2); - + Vec FA=LOAD(fpc1-k)*Ninv; Vec FB=ZMULTC(Zetak,LOAD(fm-k)); Vec FC=ZMULT(Zetak,LOAD(upc1-k)); Vec T=FB+FC; - + STORE(F+k,F0+S); STORE(fpc1-k,FA+T); STORE(fmc1+k,CONJ(FA+Mhalf*T)-HSqrt3*FLIP(FB-FC)); STORE(fm-k,F2); - } + } } ); - + if(d == D+1) { unsigned int a=d/s; Vec Zeta=Ninv*LOAD(ZetaH+a); @@ -433,7 +433,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, unsigned int i, unsigned int offset) { if(indexsize >= 1) index[indexsize-1]=i; - + // Set problem-size variables and pointers: unsigned int C=max(A,B); @@ -448,8 +448,8 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, c0[a]=f; c1[a]=f+start; } - - if(A != B) { + + if(A != B) { for(unsigned int a=0; a < C-1; ++a) { d0[a]=(double *) c0[a+1]; d1[a]=(double *) c1[a+1]; @@ -477,7 +477,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, } // Complex-to-real FFTs and pmults: - + double Re[B],Im[B]; // r=-1 (backwards): @@ -506,7 +506,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, crO->fft(c0a,d0[a]); } (*pmult)(d0,m,indexsize,index,0,threads); - + for(unsigned int b=0; b < B; ++b) { Complex *c0b=c0[b]; rcO->fft(d0[b],c0b); @@ -515,7 +515,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, Re[b]=z.re; Im[b]=z.im; } - + if(even) { for(unsigned int a=C; a-- > 0;) { // Loop from C-1 to 0. Complex *c1a=c1[a]; @@ -524,7 +524,7 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, c1a[1]=tmp; // r=1, k=1 } } - + // r=1: for(unsigned int a=A; a-- > 0;) { // Loop from A-1 to 0. Complex *c1a=c1[a]; @@ -542,9 +542,9 @@ void ImplicitHConvolution::convolve(Complex **F, realmultiplier *pmult, c1b[1]=tmp; // r=0, k=c } } - + const double ninv=1.0/(3.0*m); - + // r=-1 (forwards): if(A > B) { for(unsigned int b=0; b < B; ++b) { @@ -594,7 +594,7 @@ void fftpad::expand(Complex *f, Complex *u) } ); } - + void fftpad::backwards(Complex *f, Complex *u) { expand(f,u); @@ -637,11 +637,11 @@ void fft0pad::expand(Complex *f, Complex *u) Complex *fm1stride=f+(m-1)*stride; for(unsigned int i=0; i < M; ++i) u[i]=fm1stride[i]; - + Vec Mhalf=LOAD(-0.5); Vec Mhsqrt3=LOAD(-hsqrt3); unsigned int stop=s; - + for(unsigned int K=0; K < m; K += s) { Complex *ZetaL0=ZetaL-K; Vec H=LOAD(ZetaH+K/s); @@ -700,7 +700,7 @@ void fft0pad::reduce(Complex *f, Complex *u) Vec Mhalf=LOAD(-0.5); Vec HSqrt3=LOAD(hsqrt3); Complex *fm1stride=f+(m-1)*stride; - + unsigned int stop=s; for(unsigned int K=0; K < m; K += s) { Complex *ZetaL0=ZetaL-K; @@ -724,7 +724,7 @@ void fft0pad::reduce(Complex *f, Complex *u) } stop=min(stop+s,m); } - + for(unsigned int i=0; i < M; ++i) fm1stride[i]=umstride[i]; } @@ -733,7 +733,7 @@ void fft0pad::Forwards0(Complex *f) { Forwards->fft(f+(m-1)*stride); } - + void fft0pad::Forwards1(Complex *f, Complex *u) { Complex *umstride=u+m*stride; @@ -745,11 +745,11 @@ void fft0pad::Forwards1(Complex *f, Complex *u) fm1stride[i]=temp; } } - + void fft0pad::forwards(Complex *f, Complex *u) { - Forwards0(f); - Forwards1(f,u); + Forwards0(f); + Forwards1(f,u); Forwards->fft(f); Forwards->fft(u); reduce(f,u); @@ -763,7 +763,7 @@ void fft1pad::expand(Complex *f, Complex *u) f[i]=fmstride[i]+2.0*Nyquist; u[i]=fmstride[i] -= Nyquist; } - + Vec Mhalf=LOAD(-0.5); Vec Mhsqrt3=LOAD(-hsqrt3); unsigned int inc=s; @@ -783,7 +783,7 @@ void fft1pad::expand(Complex *f, Complex *u) for(unsigned int i=0; i < M; ++i) { Vec Fa=LOAD(fk+i); Vec Fb=LOAD(fmk+i); - + Vec B=Fa*Mhalf+Fb; STORE(fk+i,Fa+Fb); Fa *= Mhsqrt3; @@ -817,7 +817,7 @@ void fft1pad::reduce(Complex *f, Complex *u) Vec Ninv=LOAD(ninv); Vec Mhalf=LOAD(-0.5); Vec HSqrt3=LOAD(hsqrt3); - + unsigned int inc=s; PARALLEL( for(unsigned int K=0; K < m; K += inc) { @@ -893,7 +893,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C, double *ak=a+k; STORE(ak,LOAD(ak)*LOAD(b+k)*LOAD(C0+k)+ LOAD(a1+k)*LOAD(b1+k)*LOAD(C1+k)); - } + } ); #else PARALLEL( @@ -930,7 +930,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C, double *C0=C[0]; unsigned int stop=twom+offset; #ifdef __SSE2__ - PARALLEL( + PARALLEL( for(unsigned int k=offset; k < stop; k += 2) { double *p=A+k; double *q=B+k; @@ -940,7 +940,7 @@ void ImplicitHTConvolution::mult(double *a, double *b, double **C, sum += LOAD(p+istride)*LOAD(q+istride)*LOAD(C[i]+k); } STORE(p,sum); - } + } ); #else PARALLEL( @@ -965,7 +965,7 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H, { // 8M-3 of 8M FFTs are out-of-place Complex *w=W[0]; - + unsigned int m1=m+1; for(unsigned int i=0; i < M; ++i) { Complex *fi=F[i]+offset; @@ -996,13 +996,13 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H, STORE(vi+k,ZMULT(Zetak,Gk)); STORE(wi+k,ZMULT(Zetak,Hk)); } - } + } ); - + ui[m]=0.0; vi[m]=0.0; wi[m]=0.0; - + if(i+1 < M) { cro->fft(ui,(double *) (ui-m1)); cro->fft(vi,(double *) (vi-m1)); @@ -1012,8 +1012,8 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H, cr->fft(vi); cr->fft(wi); } - } - + } + mult((double *) v,(double *) u,(double **) W); rco->fft((double *) v,u); // v and w are now free @@ -1029,11 +1029,11 @@ void ImplicitHTConvolution::convolve(Complex **F, Complex **G, Complex **H, hi[m]=0.0; cro->fft(hi,(double *) gi); } - + mult((double *) v,(double *) w,(double **) G,2*offset); Complex *f=F[0]+offset; rco->fft((double *) v,f); - + double ninv=0.25/m; Vec Ninv=LOAD(ninv); PARALLEL( @@ -1090,27 +1090,27 @@ void ImplicitHFGGConvolution::convolve(Complex *f, Complex *g, STORE(u+k,ZMULT(Zetak,Fk)); STORE(v+k,ZMULT(Zetak,Gk)); } - } + } ); u[m]=0.0; v[m]=0.0; - + cr->fft(u); cr->fft(v); - + mult((double *) v,(double *) u); rco->fft((double *) v,u); // v is now free g[m]=0.0; cro->fft(g,(double *) v); - + f[m]=0.0; cro->fft(f,(double *) g); - + mult((double *) v,(double *) g); rco->fft((double *) v,f); - + double ninv=0.25/m; Vec Ninv=LOAD(ninv); @@ -1139,10 +1139,10 @@ void ImplicitHFFFConvolution::mult(double *a) double *p=a+k; Vec ak=LOAD(p); STORE(p,ak*ak*ak); - } + } ); } - + void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u) { PARALLEL( @@ -1154,9 +1154,9 @@ void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u) Vec Y=UNPACKH(CONJ(Zeta),Zeta); for(unsigned int k=K; k < stop; ++k) STORE(u+k,ZMULT(ZMULT(X,Y,LOAD(ZetaL0+k)),LOAD(f+k))); - } + } ); - + u[m]=0.0; cr->fft(u); mult((double *) u); @@ -1178,7 +1178,7 @@ void ImplicitHFFFConvolution::convolve(Complex *f, Complex *u) for(unsigned int k=K; k < stop; ++k) { Complex *p=f+k; STORE(p,ZMULTC(ZMULT(X,Y,LOAD(ZetaL0+k)),LOAD(u+k))+Ninv*LOAD(p)); - } + } } ); } @@ -1189,7 +1189,7 @@ void fft0bipad::backwards(Complex *f, Complex *u) f[i]=0.0; for(unsigned int i=0; i < M; ++i) u[i]=0.0; - + unsigned int twom=2*m; PARALLEL( for(unsigned int K=0; K < twom; K += s) { @@ -1208,7 +1208,7 @@ void fft0bipad::backwards(Complex *f, Complex *u) } } ); - + Backwards->fft(f); Backwards->fft(u); } @@ -1251,7 +1251,7 @@ void multautocorrelation(Complex **F, unsigned int m, unsigned int r, unsigned int threads) { Complex* F0=F[0]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1274,7 +1274,7 @@ void multcorrelation(Complex **F, unsigned int m, { Complex* F0=F[0]; Complex* F1=F[1]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1293,7 +1293,7 @@ void multcorrelation(Complex **F, unsigned int m, // Unscramble indices, returning spatial index for remainder r at position j. inline unsigned innerindex(unsigned j, int r) {return 2*j+r;} - + // This multiplication routine is for binary convolutions and takes two inputs // of size m. // F[0][j] *= F[1][j]; @@ -1304,7 +1304,7 @@ void multbinary(Complex **F, unsigned int m, { Complex* F0=F[0]; Complex* F1=F[1]; - + #if 0 // Spatial indices are available, if needed. size_t n=indexsize; for(unsigned int j=0; j < m; ++j) { @@ -1312,8 +1312,8 @@ void multbinary(Complex **F, unsigned int m, cout << index[d] << ","; cout << innerindex(j,r) << endl; } -#endif - +#endif + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1336,7 +1336,7 @@ void multautoconvolution(Complex **F, unsigned int m, unsigned int r, unsigned int threads) { Complex* F0=F[0]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1357,7 +1357,7 @@ inline unsigned innerindex(unsigned j, int r, unsigned int m) { int x=3*j+r; return x >= 0 ? x : 3*m-1; } - + // This multiplication routine is for binary Hermitian convolutions and takes // two inputs. // F[0][j] *= F[1][j]; @@ -1368,7 +1368,7 @@ void multbinary(double **F, unsigned int m, { double* F0=F[0]; double* F1=F[1]; - + #if 0 // Spatial indices are available, if needed. //size_t n=index.size(); size_t n=indexsize; @@ -1378,7 +1378,7 @@ void multbinary(double **F, unsigned int m, cout << innerindex(j,r,m) << endl; } #endif - + #ifdef __SSE2__ unsigned int m1=m-1; PARALLEL( @@ -1407,7 +1407,7 @@ void multbinary2(Complex **F, unsigned int m, Complex* F1=F[1]; Complex* F2=F[2]; Complex* F3=F[3]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1434,7 +1434,7 @@ void multbinary2(double **F, unsigned int m, double* F1=F[1]; double* F2=F[2]; double* F3=F[3]; - + #ifdef __SSE2__ unsigned int m1=m-1; PARALLEL( @@ -1465,7 +1465,7 @@ void multbinary3(Complex **F, unsigned int m, Complex* F3=F[3]; Complex* F4=F[4]; Complex* F5=F[5]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1487,7 +1487,7 @@ void multbinary3(Complex **F, unsigned int m, // F[0][j]=F[0][j]*F[4][j]+F[1][j]*F[5][j]+F[2][j]*F[6][j]+F[3][j]*F[7][j]; void multbinary4(Complex **F, unsigned int m, - const unsigned int indexsize, + const unsigned int indexsize, const unsigned int *index, unsigned int r, unsigned int threads) { @@ -1499,7 +1499,7 @@ void multbinary4(Complex **F, unsigned int m, Complex* F5=F[5]; Complex* F6=F[6]; Complex* F7=F[7]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1543,7 +1543,7 @@ void multbinary8(Complex **F, unsigned int m, Complex* F13=F[13]; Complex* F14=F[14]; Complex* F15=F[15]; - + #ifdef __SSE2__ PARALLEL( for(unsigned int j=0; j < m; ++j) { @@ -1578,7 +1578,7 @@ void multadvection2(double **F, unsigned int m, { double* F0=F[0]; double* F1=F[1]; - + #ifdef __SSE2__ unsigned int m1=m-1; PARALLEL( @@ -1604,7 +1604,7 @@ void multadvection2(double **F, unsigned int m, F0[j]=v*v-u*u; F1[j]=u*v; } -#endif +#endif } } // namespace fftwpp diff --git a/convolution.h b/convolution.h index 7c75f15d..a1e64d4a 100644 --- a/convolution.h +++ b/convolution.h @@ -19,7 +19,7 @@ #include "fftw++.h" #include "cmult-sse2.h" #include "transposeoptions.h" - + namespace fftwpp { #ifndef __convolution_h__ @@ -67,28 +67,28 @@ struct convolveOptions { convolveOptions(unsigned int nx, unsigned int ny, unsigned int stride2, utils::mpiOptions mpi, bool toplevel=true) : nx(nx), ny(ny), stride2(stride2), mpi(mpi), toplevel(toplevel) {} - + convolveOptions(unsigned int ny, unsigned int nz, unsigned int stride2, unsigned int stride3, utils::mpiOptions mpi, bool toplevel=true) : ny(ny), nz(nz), stride2(stride2), stride3(stride3), mpi(mpi), toplevel(toplevel) {} - + convolveOptions(bool toplevel=true) : nx(0), ny(0), nz(0), toplevel(toplevel) {} }; - + static const convolveOptions defaultconvolveOptions; typedef void multiplier(Complex **, unsigned int m, const unsigned int indexsize, const unsigned int *index, - unsigned int r, unsigned int threads); + unsigned int r, unsigned int threads); typedef void realmultiplier(double **, unsigned int m, const unsigned int indexsize, const unsigned int *index, - unsigned int r, unsigned int threads); - + unsigned int r, unsigned int threads); + // Multipliers for binary convolutions. multiplier multautoconvolution; @@ -128,15 +128,15 @@ class ImplicitConvolution : public ThreadBase { unsigned int indexsize; public: unsigned int *index; - + void initpointers(Complex **&U, Complex *u) { unsigned int C=max(A,B); U=new Complex *[C]; - for(unsigned int a=0; a < C; ++a) + for(unsigned int a=0; a < C; ++a) U[a]=u+a*m; pointers=true; } - + void deletepointers(Complex **&U) { delete [] U; } @@ -145,17 +145,17 @@ class ImplicitConvolution : public ThreadBase { indexsize=n; index=i; } - + void init() { indexsize=0; - + Complex* U0=U[0]; Complex* U1=A == 1 ? utils::ComplexAlign(m) : U[1]; - + BackwardsO=new fft1d(m,1,U0,U1); ForwardsO=new fft1d(m,-1,U0,U1); threads=std::min(threads,max(BackwardsO->Threads(),ForwardsO->Threads())); - + if(A == B) { Backwards=new fft1d(m,1,U0); threads=std::min(threads,Backwards->Threads()); @@ -164,12 +164,12 @@ class ImplicitConvolution : public ThreadBase { Forwards=new fft1d(m,-1,U0); threads=std::min(threads,Forwards->Threads()); } - + if(A == 1) utils::deleteAlign(U1); s=BuildZeta(2*m,m,ZetaH,ZetaL,threads); } - + // m is the number of Complex data values. // U is an array of C distinct work arrays each of size m, where C=max(A,B) // A is the number of inputs. @@ -181,7 +181,7 @@ class ImplicitConvolution : public ThreadBase { allocated(false) { init(); } - + // m is the number of Complex data values. // u is a work array of C*m Complex values. // A is the number of inputs. @@ -193,7 +193,7 @@ class ImplicitConvolution : public ThreadBase { initpointers(U,u); init(); } - + // m is the number of Complex data values. // A is the number of inputs. // B is the number of outputs. @@ -205,28 +205,28 @@ class ImplicitConvolution : public ThreadBase { initpointers(U,u); init(); } - + ~ImplicitConvolution() { utils::deleteAlign(ZetaH); utils::deleteAlign(ZetaL); - + if(pointers) deletepointers(U); if(allocated) utils::deleteAlign(u); - + if(A == B) delete Backwards; if(A <= B) delete Forwards; - + delete ForwardsO; - delete BackwardsO; + delete BackwardsO; } // F is an array of A pointers to distinct data blocks each of size m, // shifted by offset (contents not preserved). void convolve(Complex **F, multiplier *pmult, unsigned int i=0, unsigned int offset=0); - + void autoconvolve(Complex *f) { Complex *F[]={f}; convolve(F,multautoconvolution); @@ -248,13 +248,13 @@ class ImplicitConvolution : public ThreadBase { Complex *F[]={f,g}; convolve(F,multcorrelation); } - + template inline void pretransform(Complex **F, unsigned int k, Vec& Zetak); template void pretransform(Complex **F); - + void posttransform(Complex *f, Complex *u); }; @@ -284,25 +284,25 @@ class ImplicitHConvolution : public ThreadBase { unsigned int C=max(A,B); U=new Complex *[C]; unsigned stride=c+1; - for(unsigned int a=0; a < C; ++a) + for(unsigned int a=0; a < C; ++a) U[a]=u+a*stride; pointers=true; } - + void deletepointers(Complex **&U) { delete [] U; } - + void allocateindex(unsigned int n, unsigned int *i) { indexsize=n; index=i; } - + void init() { even=m == 2*c; indexsize=0; Complex* U0=U[0]; - + rc=new rcfft1d(m,U0); cr=new crfft1d(m,U0); @@ -310,7 +310,7 @@ class ImplicitHConvolution : public ThreadBase { rco=new rcfft1d(m,(double *) U0,U1); cro=new crfft1d(m,U1,(double *) U0); if(A == 1) utils::deleteAlign(U1); - + if(A != B) { rcO=rco; crO=cro; @@ -318,12 +318,12 @@ class ImplicitHConvolution : public ThreadBase { rcO=rc; crO=cr; } - + threads=std::min(threads,std::max(rco->Threads(),cro->Threads())); s=BuildZeta(3*m,c+2,ZetaH,ZetaL,threads); w=even ? utils::ComplexAlign(max(A,B)) : u; } - + // m is the number of independent data values // U is an array of max(A,B) distinct work arrays of size c+1, where c=m/2 // A is the number of inputs. @@ -359,7 +359,7 @@ class ImplicitHConvolution : public ThreadBase { ImplicitHConvolution(unsigned int m, bool compact, Complex *u, unsigned int A=2, unsigned int B=1, unsigned int threads=fftw::maxthreads) - : ThreadBase(threads), m(m), c(m/2), compact(compact), A(A), B(B), u(u), + : ThreadBase(threads), m(m), c(m/2), compact(compact), A(A), B(B), u(u), allocated(false) { initpointers(U,u); init(); @@ -381,7 +381,7 @@ class ImplicitHConvolution : public ThreadBase { if(even) utils::deleteAlign(w); utils::deleteAlign(ZetaH); utils::deleteAlign(ZetaL); - + if(pointers) deletepointers(U); if(allocated) utils::deleteAlign(u); @@ -389,14 +389,14 @@ class ImplicitHConvolution : public ThreadBase { delete cro; delete rco; } - + delete cr; delete rc; } - + // F is an array of A pointers to distinct data blocks each of size m, // shifted by offset (contents not preserved). - void convolve(Complex **F, realmultiplier *pmult, unsigned int i=0, + void convolve(Complex **F, realmultiplier *pmult, unsigned int i=0, unsigned int offset=0); void pretransform(Complex *F, Complex *f1c, Complex *U); @@ -408,7 +408,7 @@ class ImplicitHConvolution : public ThreadBase { convolve(F,multbinary); } }; - + // Compute the scrambled implicitly m-padded complex Fourier transform of M // complex vectors, each of length m. @@ -430,36 +430,36 @@ class fftpad { unsigned int s; Complex *ZetaH, *ZetaL; unsigned int threads; -public: - mfft1d *Backwards; +public: + mfft1d *Backwards; mfft1d *Forwards; - + fftpad(unsigned int m, unsigned int M, unsigned int stride, Complex *u=NULL, unsigned int Threads=fftw::maxthreads) : m(m), M(M), stride(stride), threads(Threads) { Backwards=new mfft1d(m,1,M,stride,1,u,NULL,threads); Forwards=new mfft1d(m,-1,M,stride,1,u,NULL,threads); - + threads=std::max(Backwards->Threads(),Forwards->Threads()); - + s=BuildZeta(2*m,m,ZetaH,ZetaL,threads); } - + ~fftpad() { utils::deleteAlign(ZetaL); utils::deleteAlign(ZetaH); delete Forwards; delete Backwards; } - + void expand(Complex *f, Complex *u); void reduce(Complex *f, Complex *u); - + void backwards(Complex *f, Complex *u); void forwards(Complex *f, Complex *u); }; - + // Compute the scrambled implicitly m-padded complex Fourier transform of M // complex vectors, each of length 2m-1 with the origin at index m-1, // containing physical data for wavenumbers -m+1 to m-1. @@ -474,33 +474,33 @@ class fftpad { // stride is the spacing between the elements of each Complex vector. // class fft0pad { -protected: +protected: unsigned int m; unsigned int M; unsigned int s; unsigned int stride; Complex *ZetaH, *ZetaL; unsigned int threads; -public: +public: mfft1d *Forwards; mfft1d *Backwards; - + fft0pad(unsigned int m, unsigned int M, unsigned int stride, Complex *u=NULL, unsigned int Threads=fftw::maxthreads) : m(m), M(M), stride(stride), threads(Threads) { Backwards=new mfft1d(m,1,M,stride,1,u,NULL,threads); Forwards=new mfft1d(m,-1,M,stride,1,u,NULL,threads); - + s=BuildZeta(3*m,m,ZetaH,ZetaL); } - + virtual ~fft0pad() { utils::deleteAlign(ZetaL); utils::deleteAlign(ZetaH); delete Forwards; delete Backwards; } - + // Unscramble indices, returning spatial index stored at position i inline static unsigned findex(unsigned i, unsigned int m) { return i < m-1 ? 3*i : 3*i+4-3*m; // for i >= m-1: j=3*(i-(m-1))+1 @@ -512,15 +512,15 @@ class fft0pad { virtual void expand(Complex *f, Complex *u); virtual void reduce(Complex *f, Complex *u); - + void backwards(Complex *f, Complex *u); virtual void forwards(Complex *f, Complex *u); - + virtual void Backwards1(Complex *f, Complex *u); virtual void Forwards0(Complex *f); virtual void Forwards1(Complex *f, Complex *u); }; - + // Compute the scrambled implicitly m-padded complex Fourier transform of M // complex vectors, each of length 2m with the origin at index m, // corresponding to wavenumbers -m to m-1. @@ -535,30 +535,30 @@ class fft0pad { // stride is the spacing between the elements of each Complex vector. // class fft1pad : public fft0pad { -public: +public: fft1pad(unsigned int m, unsigned int M, unsigned int stride, - Complex *u=NULL, unsigned int threads=fftw::maxthreads) : + Complex *u=NULL, unsigned int threads=fftw::maxthreads) : fft0pad(m,M,stride,u,threads) {} // Unscramble indices, returning spatial index stored at position i inline static unsigned findex(unsigned i, unsigned int m) { return i < m ? 3*i : 3*(i-m)+1; } - + inline static unsigned uindex(unsigned i, unsigned int m) { return i > 0 ? 3*i-1 : 3*m-1; } - + void expand(Complex *f, Complex *u); void reduce(Complex *f, Complex *u); - + void forwards(Complex *f, Complex *u); - + void Backwards1(Complex *f, Complex *u); void Forwards0(Complex *f); void Forwards1(Complex *f, Complex *u); }; - + // In-place implicitly dealiased 2D complex convolution. class ImplicitConvolution2 : public ThreadBase { protected: @@ -572,28 +572,28 @@ class ImplicitConvolution2 : public ThreadBase { bool allocated; unsigned int indexsize; bool toplevel; -public: +public: unsigned int *index; void initpointers2(Complex **&U2, Complex *u2, unsigned int stride) { U2=new Complex *[A]; for(unsigned int a=0; a < A; ++a) U2[a]=u2+a*stride; - + if(toplevel) allocateindex(1,new unsigned int[1]); } - + void deletepointers2(Complex **&U2) { if(toplevel) { delete [] index; - + for(unsigned int t=1; t < threads; ++t) delete [] yconvolve[t]->index; } delete [] U2; } - + void allocateindex(unsigned int n, unsigned int *i) { indexsize=n; index=i; @@ -601,7 +601,7 @@ class ImplicitConvolution2 : public ThreadBase { for(unsigned int t=1; t < threads; ++t) yconvolve[t]->allocateindex(n,new unsigned int[n]); } - + void init(const convolveOptions& options) { toplevel=options.toplevel; xfftpad=new fftpad(mx,options.ny,options.ny,u2,threads); @@ -611,7 +611,7 @@ class ImplicitConvolution2 : public ThreadBase { yconvolve[t]=new ImplicitConvolution(my,u1+t*my*C,A,B,innerthreads); initpointers2(U2,u2,options.stride2); } - + void set(convolveOptions& options) { if(options.nx == 0) options.nx=mx; if(options.ny == 0) { @@ -619,7 +619,7 @@ class ImplicitConvolution2 : public ThreadBase { options.stride2=mx*my; } } - + // u1 is a temporary array of size my*C*threads. // u2 is a temporary array of size mx*my*C. // A is the number of inputs. @@ -636,7 +636,7 @@ class ImplicitConvolution2 : public ThreadBase { multithread(options.nx); init(options); } - + ImplicitConvolution2(unsigned int mx, unsigned int my, unsigned int A=2, unsigned int B=1, unsigned int threads=fftw::maxthreads, @@ -649,34 +649,34 @@ class ImplicitConvolution2 : public ThreadBase { u2=utils::ComplexAlign(options.stride2*C); init(options); } - + virtual ~ImplicitConvolution2() { deletepointers2(U2); - + for(unsigned int t=0; t < threads; ++t) delete yconvolve[t]; delete [] yconvolve; - + delete xfftpad; - + if(allocated) { utils::deleteAlign(u2); utils::deleteAlign(u1); } } - + void backwards(Complex **F, Complex **U2, unsigned int offset) { for(unsigned int a=0; a < A; ++a) xfftpad->backwards(F[a]+offset,U2[a]); } - void subconvolution(Complex **F, multiplier *pmult, + void subconvolution(Complex **F, multiplier *pmult, unsigned int r, unsigned int M, unsigned int stride, unsigned int offset=0) { if(threads > 1) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < M; ++i) yconvolve[get_thread_num()]->convolve(F,pmult,2*i+r,offset+i*stride); } else { @@ -685,12 +685,12 @@ class ImplicitConvolution2 : public ThreadBase { yconvolve0->convolve(F,pmult,2*i+r,offset+i*stride); } } - + void forwards(Complex **F, Complex **U2, unsigned int offset) { for(unsigned int b=0; b < B; ++b) xfftpad->forwards(F[b]+offset,U2[b]); } - + // F is a pointer to A distinct data blocks each of size mx*my, // shifted by offset (contents not preserved). virtual void convolve(Complex **F, multiplier *pmult, unsigned int i=0, @@ -710,7 +710,7 @@ class ImplicitConvolution2 : public ThreadBase { subconvolution(U2,pmult,1,mx,my); forwards(F,U2,offset); } - + // Binary convolution: void convolve(Complex *f, Complex *g) { Complex *F[]={f,g}; @@ -755,12 +755,12 @@ inline void HermitianSymmetrizeXY(unsigned int mx, unsigned int my, int mxstride=mx*stride; unsigned int myz=my*mz; unsigned int origin=xorigin*stride+yorigin*mz; - + f[origin].im=0.0; for(int i=stride; i < mxstride; i += stride) f[origin-i]=conj(f[origin+i]); - + PARALLEL( for(int i=stride-mxstride; i < mxstride; i += stride) { int stop=i+myz; @@ -789,27 +789,27 @@ class ImplicitHConvolution2 : public ThreadBase { public: unsigned int *index; - void initpointers2(Complex **&U2, Complex *u2, unsigned int stride) + void initpointers2(Complex **&U2, Complex *u2, unsigned int stride) { unsigned int C=max(A,B); U2=new Complex *[C]; for(unsigned int a=0; a < C; ++a) U2[a]=u2+a*stride; - + if(toplevel) allocateindex(1,new unsigned int[1]); } - + void deletepointers2(Complex **&U2) { if(toplevel) { delete [] index; - + for(unsigned int t=1; t < threads; ++t) delete [] yconvolve[t]->index; } delete [] U2; } - + void allocateindex(unsigned int n, unsigned int *i) { indexsize=n; index=i; @@ -817,13 +817,13 @@ class ImplicitHConvolution2 : public ThreadBase { for(unsigned int t=1; t < threads; ++t) yconvolve[t]->allocateindex(n,new unsigned int[n]); } - + void init(const convolveOptions& options) { unsigned int C=max(A,B); toplevel=options.toplevel; xfftpad=xcompact ? new fft0pad(mx,options.ny,options.ny,u2) : new fft1pad(mx,options.ny,options.ny,u2); - + yconvolve=new ImplicitHConvolution*[threads]; for(unsigned int t=0; t < threads; ++t) yconvolve[t]=new ImplicitHConvolution(my,ycompact,u1+t*(my/2+1)*C,A,B, @@ -838,7 +838,7 @@ class ImplicitHConvolution2 : public ThreadBase { options.stride2=(mx+xcompact)*options.ny; } } - + // u1 is a temporary array of size (my/2+1)*C*threads. // u2 is a temporary array of size (mx+xcompact)*(my+!ycompact)*C; // A is the number of inputs. @@ -855,21 +855,21 @@ class ImplicitHConvolution2 : public ThreadBase { multithread(options.nx); init(options); } - + ImplicitHConvolution2(unsigned int mx, unsigned int my, bool xcompact, bool ycompact, Complex *u1, Complex *u2, unsigned int A=2, unsigned int B=1, unsigned int threads=fftw::maxthreads, convolveOptions options=defaultconvolveOptions) : - ThreadBase(threads), mx(mx), my(my), + ThreadBase(threads), mx(mx), my(my), xcompact(xcompact), ycompact(ycompact), u1(u1), u2(u2), A(A), B(B), allocated(false) { set(options); multithread(options.nx); init(options); } - + ImplicitHConvolution2(unsigned int mx, unsigned int my, bool xcompact=true, bool ycompact=true, unsigned int A=2, unsigned int B=1, @@ -884,16 +884,16 @@ class ImplicitHConvolution2 : public ThreadBase { u2=utils::ComplexAlign(options.stride2*C); init(options); } - + virtual ~ImplicitHConvolution2() { deletepointers2(U2); - + for(unsigned int t=0; t < threads; ++t) delete yconvolve[t]; delete [] yconvolve; - + delete xfftpad; - + if(allocated) { utils::deleteAlign(u2); utils::deleteAlign(u1); @@ -917,7 +917,7 @@ class ImplicitHConvolution2 : public ThreadBase { if(threads > 1) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < M; ++i) yconvolve[get_thread_num()]->convolve(F,pmult,indexfunction(i,mx), offset+i*stride); @@ -926,14 +926,14 @@ class ImplicitHConvolution2 : public ThreadBase { for(unsigned int i=0; i < M; ++i) yconvolve0->convolve(F,pmult,indexfunction(i,mx),offset+i*stride); } - } - + } + void forwards(Complex **F, Complex **U2, unsigned int offset) { for(unsigned int b=0; b < B; ++b) xfftpad->forwards(F[b]+offset,U2[b]); } - - // F is a pointer to A distinct data blocks each of size + + // F is a pointer to A distinct data blocks each of size // (2mx-xcompact)*(my+!ycompact), shifted by offset (contents not preserved). virtual void convolve(Complex **F, realmultiplier *pmult, bool symmetrize=true, unsigned int i=0, @@ -954,14 +954,14 @@ class ImplicitHConvolution2 : public ThreadBase { subconvolution(U2,pmult,xfftpad->uindex,mx+xcompact,stride); forwards(F,U2,offset); } - + // Binary convolution: void convolve(Complex *f, Complex *g, bool symmetrize=true) { Complex *F[]={f,g}; convolve(F,multbinary,symmetrize); } }; - + // In-place implicitly dealiased 3D complex convolution. class ImplicitConvolution3 : public ThreadBase { protected: @@ -976,7 +976,7 @@ class ImplicitConvolution3 : public ThreadBase { bool allocated; unsigned int indexsize; bool toplevel; -public: +public: unsigned int *index; void initpointers3(Complex **&U3, Complex *u3, unsigned int stride) { @@ -984,21 +984,21 @@ class ImplicitConvolution3 : public ThreadBase { U3=new Complex *[C]; for(unsigned int a=0; a < C; ++a) U3[a]=u3+a*stride; - + if(toplevel) allocateindex(2,new unsigned int[2]); } - + void deletepointers3(Complex **&U3) { if(toplevel) { delete [] index; - + for(unsigned int t=1; t < threads; ++t) delete [] yzconvolve[t]->index; } - + delete [] U3; } - + void allocateindex(unsigned int n, unsigned int *i) { indexsize=n; index=i; @@ -1006,12 +1006,12 @@ class ImplicitConvolution3 : public ThreadBase { for(unsigned int t=1; t < threads; ++t) yzconvolve[t]->allocateindex(n,new unsigned int[n]); } - + void init(const convolveOptions& options) { toplevel=options.toplevel; unsigned int nyz=options.ny*options.nz; xfftpad=new fftpad(mx,nyz,nyz,u3,threads); - + if(options.nz == mz) { unsigned int C=max(A,B); yzconvolve=new ImplicitConvolution2*[threads]; @@ -1022,7 +1022,7 @@ class ImplicitConvolution3 : public ThreadBase { initpointers3(U3,u3,options.stride3); } else yzconvolve=NULL; } - + void set(convolveOptions &options) { if(options.ny == 0) { @@ -1032,7 +1032,7 @@ class ImplicitConvolution3 : public ThreadBase { options.stride3=mx*my*mz; } } - + // u1 is a temporary array of size mz*C*threads. // u2 is a temporary array of size my*mz*C*threads. // u3 is a temporary array of size mx*my*mz*C. @@ -1041,7 +1041,7 @@ class ImplicitConvolution3 : public ThreadBase { // Here C=max(A,B). ImplicitConvolution3(unsigned int mx, unsigned int my, unsigned int mz, Complex *u1, Complex *u2, Complex *u3, - unsigned int A=2, unsigned int B=1, + unsigned int A=2, unsigned int B=1, unsigned int threads=fftw::maxthreads, convolveOptions options=defaultconvolveOptions) : ThreadBase(threads), mx(mx), my(my), mz(mz), @@ -1052,7 +1052,7 @@ class ImplicitConvolution3 : public ThreadBase { } ImplicitConvolution3(unsigned int mx, unsigned int my, unsigned int mz, - unsigned int A=2, unsigned int B=1, + unsigned int A=2, unsigned int B=1, unsigned int threads=fftw::maxthreads, convolveOptions options=defaultconvolveOptions) : ThreadBase(threads), mx(mx), my(my), mz(mz), A(A), B(B), @@ -1065,7 +1065,7 @@ class ImplicitConvolution3 : public ThreadBase { u3=utils::ComplexAlign(options.stride3*C); init(options); } - + virtual ~ImplicitConvolution3() { if(yzconvolve) { deletepointers3(U3); @@ -1074,28 +1074,28 @@ class ImplicitConvolution3 : public ThreadBase { delete yzconvolve[t]; delete [] yzconvolve; } - + delete xfftpad; - + if(allocated) { utils::deleteAlign(u3); utils::deleteAlign(u2); utils::deleteAlign(u1); } } - + void backwards(Complex **F, Complex **U3, unsigned int offset) { for(unsigned int a=0; a < A; ++a) xfftpad->backwards(F[a]+offset,U3[a]); } - void subconvolution(Complex **F, multiplier *pmult, + void subconvolution(Complex **F, multiplier *pmult, unsigned int r, unsigned int M, unsigned int stride, unsigned int offset=0) { if(threads > 1) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < M; ++i) yzconvolve[get_thread_num()]->convolve(F,pmult,2*i+r,offset+i*stride); } else { @@ -1105,12 +1105,12 @@ class ImplicitConvolution3 : public ThreadBase { } } } - + void forwards(Complex **F, Complex **U3, unsigned int offset=0) { for(unsigned int b=0; b < B; ++b) xfftpad->forwards(F[b]+offset,U3[b]); } - + // F is a pointer to A distinct data blocks each of size mx*my*mz, // shifted by offset virtual void convolve(Complex **F, multiplier *pmult, unsigned int i=0, @@ -1132,7 +1132,7 @@ class ImplicitConvolution3 : public ThreadBase { subconvolution(U3,pmult,1,mx,stride); forwards(F,U3,offset); } - + // Binary convolution: void convolve(Complex *f, Complex *g) { Complex *F[]={f,g}; @@ -1171,29 +1171,29 @@ class ImplicitHConvolution3 : public ThreadBase { bool allocated; unsigned int indexsize; bool toplevel; -public: +public: unsigned int *index; - + void initpointers3(Complex **&U3, Complex *u3, unsigned int stride) { unsigned int C=max(A,B); U3=new Complex *[C]; for(unsigned int a=0; a < C; ++a) U3[a]=u3+a*stride; - + if(toplevel) allocateindex(2,new unsigned int[2]); } - + void deletepointers3(Complex **&U3) { if(toplevel) { delete [] index; - + for(unsigned int t=1; t < threads; ++t) delete [] yzconvolve[t]->index; } - + delete [] U3; } - + void allocateindex(unsigned int n, unsigned int *i) { indexsize=n; index=i; @@ -1201,14 +1201,14 @@ class ImplicitHConvolution3 : public ThreadBase { for(unsigned int t=1; t < threads; ++t) yzconvolve[t]->allocateindex(n,new unsigned int[n]); } - + void init(const convolveOptions& options) { toplevel=options.toplevel; unsigned int nyz=options.ny*options.nz; xfftpad=xcompact ? new fft0pad(mx,nyz,nyz,u3) : new fft1pad(mx,nyz,nyz,u3); - if(options.nz == mz+!zcompact) { + if(options.nz == mz+!zcompact) { unsigned int C=max(A,B); yzconvolve=new ImplicitHConvolution2*[threads]; for(unsigned int t=0; t < threads; ++t) @@ -1220,7 +1220,7 @@ class ImplicitHConvolution3 : public ThreadBase { initpointers3(U3,u3,options.stride3); } else yzconvolve=NULL; } - + void set(convolveOptions& options) { if(options.ny == 0) { options.ny=2*my-ycompact; @@ -1229,10 +1229,10 @@ class ImplicitHConvolution3 : public ThreadBase { options.stride3=(mx+xcompact)*options.ny*options.nz; } } - + // u1 is a temporary array of size (mz/2+1)*C*threads. // u2 is a temporary array of size (my+ycompact)*(mz+!zcompact)*C*threads. - // u3 is a temporary array of size + // u3 is a temporary array of size // (mx+xcompact)*(2my-ycompact)*(mz+!zcompact)*C. // A is the number of inputs. // B is the number of outputs. @@ -1250,7 +1250,7 @@ class ImplicitHConvolution3 : public ThreadBase { multithread(mx); init(options); } - + ImplicitHConvolution3(unsigned int mx, unsigned int my, unsigned int mz, bool xcompact, bool ycompact, bool zcompact, Complex *u1, Complex *u2, Complex *u3, @@ -1258,13 +1258,13 @@ class ImplicitHConvolution3 : public ThreadBase { unsigned int threads=fftw::maxthreads, convolveOptions options=defaultconvolveOptions) : ThreadBase(threads), mx(mx), my(my), mz(mz), - xcompact(xcompact), ycompact(ycompact), zcompact(zcompact), + xcompact(xcompact), ycompact(ycompact), zcompact(zcompact), u1(u1), u2(u2), u3(u3), A(A), B(B), allocated(false) { set(options); multithread(mx); init(options); } - + ImplicitHConvolution3(unsigned int mx, unsigned int my, unsigned int mz, bool xcompact=true, bool ycompact=true, bool zcompact=true, @@ -1282,31 +1282,31 @@ class ImplicitHConvolution3 : public ThreadBase { u3=utils::ComplexAlign(options.stride3*C); init(options); } - + virtual ~ImplicitHConvolution3() { if(yzconvolve) { deletepointers3(U3); - + for(unsigned int t=0; t < threads; ++t) delete yzconvolve[t]; delete [] yzconvolve; } delete xfftpad; - + if(allocated) { utils::deleteAlign(u3); utils::deleteAlign(u2); utils::deleteAlign(u1); } } - + virtual void HermitianSymmetrize(Complex *f, Complex *u) - { + { HermitianSymmetrizeXY(mx,my,mz+!zcompact,mx-xcompact,my-ycompact,f, threads); } - + void backwards(Complex **F, Complex **U3, bool symmetrize, unsigned int offset) { for(unsigned int a=0; a < A; ++a) { @@ -1325,7 +1325,7 @@ class ImplicitHConvolution3 : public ThreadBase { if(threads > 1) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < M; ++i) yzconvolve[get_thread_num()]->convolve(F,pmult,false, indexfunction(i,mx), @@ -1342,9 +1342,9 @@ class ImplicitHConvolution3 : public ThreadBase { for(unsigned int b=0; b < B; ++b) xfftpad->forwards(F[b]+offset,U3[b]); } - + // F is a pointer to A distinct data blocks each of size - // (2mx-compact)*(2my-ycompact)*(mz+!zcompact), shifted by offset + // (2mx-compact)*(2my-ycompact)*(mz+!zcompact), shifted by offset // (contents not preserved). virtual void convolve(Complex **F, realmultiplier *pmult, bool symmetrize=true, unsigned int i=0, @@ -1358,14 +1358,14 @@ class ImplicitHConvolution3 : public ThreadBase { Index[i]=index[i]; } } - } + } unsigned int stride=(2*my-ycompact)*(mz+!zcompact); backwards(F,U3,symmetrize,offset); subconvolution(F,pmult,xfftpad->findex,2*mx-xcompact,stride,offset); subconvolution(U3,pmult,xfftpad->uindex,mx+xcompact,stride); forwards(F,U3,offset); } - + // Binary convolution: void convolve(Complex *f, Complex *g, bool symmetrize=true) { Complex *F[]={f,g}; @@ -1387,51 +1387,51 @@ class ImplicitHTConvolution : public ThreadBase { bool allocated; unsigned int twom; unsigned int stride; -public: +public: void initpointers(Complex **&W, Complex *w) { W=new Complex *[M]; unsigned int m1=m+1; - for(unsigned int s=0; s < M; ++s) + for(unsigned int s=0; s < M; ++s) W[s]=w+s*m1; } - + void deletepointers(Complex **&W) { delete [] W; } - + void init() { twom=2*m; stride=twom+2; - + rc=new rcfft1d(twom,u); cr=new crfft1d(twom,u); - + rco=new rcfft1d(twom,(double *) u,v); cro=new crfft1d(twom,v,(double *) u); - + threads=std::min(threads,std::max(rco->Threads(),cro->Threads())); - + s=BuildZeta(4*m,m,ZetaH,ZetaL,threads); - + initpointers(W,w); } - + // u, v, and w are distinct temporary arrays each of size (m+1)*M. ImplicitHTConvolution(unsigned int m, Complex *u, Complex *v, Complex *w, unsigned int M=1) : m(m), u(u), v(v), w(w), M(M), allocated(false) { init(); } - - ImplicitHTConvolution(unsigned int m, unsigned int M=1) : + + ImplicitHTConvolution(unsigned int m, unsigned int M=1) : m(m), u(utils::ComplexAlign(m*M+M)), v(utils::ComplexAlign(m*M+M)), w(utils::ComplexAlign(m*M+M)), M(M), allocated(true) { init(); } - + ~ImplicitHTConvolution() { deletepointers(W); - + if(allocated) { utils::deleteAlign(w); utils::deleteAlign(v); @@ -1444,20 +1444,20 @@ class ImplicitHTConvolution : public ThreadBase { delete cr; delete rc; } - + void mult(double *a, double *b, double **C, unsigned int offset=0); - - void convolve(Complex **F, Complex **G, Complex **H, + + void convolve(Complex **F, Complex **G, Complex **H, Complex *u, Complex *v, Complex **W, unsigned int offset=0); - + // F, G, and H are distinct pointers to M distinct data blocks each of size // m+1, shifted by offset (contents not preserved). // The output is returned in F[0]. void convolve(Complex **F, Complex **G, Complex **H, unsigned int offset=0) { convolve(F,G,H,u,v,W,offset); } - + // Constructor for special case M=1: void convolve(Complex *f, Complex *g, Complex *h) { convolve(&f,&g,&h); @@ -1477,34 +1477,34 @@ class ImplicitHFGGConvolution : public ThreadBase { bool allocated; unsigned int twom; unsigned int stride; -public: +public: void init() { twom=2*m; stride=twom+2; - + rc=new rcfft1d(twom,u); cr=new crfft1d(twom,u); - + rco=new rcfft1d(twom,(double *) u,v); cro=new crfft1d(twom,v,(double *) u); - + threads=std::min(threads,std::max(rco->Threads(),cro->Threads())); - + s=BuildZeta(4*m,m,ZetaH,ZetaL,threads); } - + // u and v are distinct temporary arrays each of size m+1. ImplicitHFGGConvolution(unsigned int m, Complex *u, Complex *v) : m(m), u(u), v(v), allocated(false) { init(); } - - ImplicitHFGGConvolution(unsigned int m) : + + ImplicitHFGGConvolution(unsigned int m) : m(m), u(utils::ComplexAlign(m+1)), v(utils::ComplexAlign(m+1)), allocated(true) { init(); } - + ~ImplicitHFGGConvolution() { if(allocated) { utils::deleteAlign(v); @@ -1517,11 +1517,11 @@ class ImplicitHFGGConvolution : public ThreadBase { delete cr; delete rc; } - + void mult(double *a, double *b); - + void convolve(Complex *f, Complex *g, Complex *u, Complex *v); - + // f and g are distinct pointers to data of size m+1 (contents not // preserved). The output is returned in f. void convolve(Complex *f, Complex *g) { @@ -1544,40 +1544,40 @@ class ImplicitHFFFConvolution : public ThreadBase { unsigned int stride; public: void mult(double *a); - + void init() { twom=2*m; stride=twom+2; - + rc=new rcfft1d(twom,u); cr=new crfft1d(twom,u); - + threads=std::min(threads,std::max(rc->Threads(),cr->Threads())); - + s=BuildZeta(4*m,m,ZetaH,ZetaL,threads); } - + // u is a distinct temporary array of size m+1. ImplicitHFFFConvolution(unsigned int m, Complex *u) : m(m), u(u), allocated(false) { init(); } - + ImplicitHFFFConvolution(unsigned int m) : m(m), u(utils::ComplexAlign(m+1)), allocated(true) { init(); } - + ~ImplicitHFFFConvolution() { if(allocated) utils::deleteAlign(u); - + utils::deleteAlign(ZetaL); utils::deleteAlign(ZetaH); delete cr; delete rc; } - + void convolve(Complex *f, Complex *u); // f is a pointer to data of size m+1 (contents not preserved). // The output is returned in f. @@ -1585,7 +1585,7 @@ class ImplicitHFFFConvolution : public ThreadBase { convolve(f,u); } }; - + // Compute the scrambled implicitly 2m-padded complex Fourier transform of M // complex vectors, each of length 2m with the Fourier origin at index m. // The arrays in and out (which may coincide), along @@ -1607,27 +1607,27 @@ class fft0bipad { mfft1d *Forwards; Complex *ZetaH, *ZetaL; unsigned int threads; -public: +public: fft0bipad(unsigned int m, unsigned int M, unsigned int stride, - Complex *f, unsigned int Threads=fftw::maxthreads) : + Complex *f, unsigned int Threads=fftw::maxthreads) : m(m), M(M), stride(stride), threads(Threads) { unsigned int twom=2*m; Backwards=new mfft1d(twom,1,M,stride,1,f,NULL,threads); Forwards=new mfft1d(twom,-1,M,stride,1,f,NULL,threads); - + threads=std::min(threads, std::max(Backwards->Threads(),Forwards->Threads())); - + s=BuildZeta(4*m,twom,ZetaH,ZetaL,threads); } - + ~fft0bipad() { utils::deleteAlign(ZetaL); utils::deleteAlign(ZetaH); delete Forwards; delete Backwards; } - + void backwards(Complex *f, Complex *u); void forwards(Complex *f, Complex *u); }; @@ -1645,7 +1645,7 @@ class ImplicitHTConvolution2 : public ThreadBase { bool allocated; Complex **u,**v; Complex ***W; -public: +public: void initpointers(Complex **&u, Complex **&v, Complex ***&W, unsigned int threads) { u=new Complex *[threads]; @@ -1660,7 +1660,7 @@ class ImplicitHTConvolution2 : public ThreadBase { yconvolve->initpointers(W[i],wi); } } - + void deletepointers(Complex **&u, Complex **&v, Complex ***&W, unsigned int threads) { for(unsigned int i=0; i < threads; ++i) @@ -1669,7 +1669,7 @@ class ImplicitHTConvolution2 : public ThreadBase { delete [] v; delete [] u; } - + void initpointers(Complex **&U2, Complex **&V2, Complex **&W2, Complex *u2, Complex *v2, Complex *w2) { U2=new Complex *[M]; @@ -1683,29 +1683,29 @@ class ImplicitHTConvolution2 : public ThreadBase { W2[s]=w2+smu; } } - + void deletepointers(Complex **&U2, Complex **&V2, Complex **&W2) { delete [] W2; delete [] V2; delete [] U2; } - + void init() { xfftpad=new fft0bipad(mx,my,my+1,u2,threads); - + yconvolve=new ImplicitHTConvolution(my,u1,v1,w1,M); yconvolve->Threads(1); initpointers(u,v,W,threads); initpointers(U2,V2,W2,u2,v2,w2); } - + // u1, v1, and w1 are temporary arrays of size (my+1)*M*threads; // u2, v2, and w2 are temporary arrays of size 2mx*(my+1)*M. // M is the number of data blocks (each corresponding to a dot product term). // threads is the number of threads to use in the outer subconvolution loop. ImplicitHTConvolution2(unsigned int mx, unsigned int my, - Complex *u1, Complex *v1, Complex *w1, + Complex *u1, Complex *v1, Complex *w1, Complex *u2, Complex *v2, Complex *w2, unsigned int M=1, unsigned int threads=fftw::maxthreads) : @@ -1713,7 +1713,7 @@ class ImplicitHTConvolution2 : public ThreadBase { u2(u2), v2(v2), w2(w2), M(M), allocated(false) { init(); } - + ImplicitHTConvolution2(unsigned int mx, unsigned int my, unsigned int M=1, unsigned int threads=fftw::maxthreads) : @@ -1727,14 +1727,14 @@ class ImplicitHTConvolution2 : public ThreadBase { M(M), allocated(true) { init(); } - + ~ImplicitHTConvolution2() { deletepointers(U2,V2,W2); deletepointers(u,v,W,threads); - + delete yconvolve; delete xfftpad; - + if(allocated) { utils::deleteAlign(w2); utils::deleteAlign(v2); @@ -1744,32 +1744,32 @@ class ImplicitHTConvolution2 : public ThreadBase { utils::deleteAlign(u1); } } - - void convolve(Complex **F, Complex **G, Complex **H, - Complex **u, Complex **v, Complex ***W, + + void convolve(Complex **F, Complex **G, Complex **H, + Complex **u, Complex **v, Complex ***W, Complex **U2, Complex **V2, Complex **W2, bool symmetrize=true, unsigned int offset=0) { Complex *u2=U2[0]; Complex *v2=V2[0]; Complex *w2=W2[0]; - + unsigned int my1=my+1; unsigned int mu=2*mx*my1; - + for(unsigned int s=0; s < M; ++s) { Complex *f=F[s]+offset; if(symmetrize) HermitianSymmetrizeX(mx,my1,mx,f); xfftpad->backwards(f,u2+s*mu); } - + for(unsigned int s=0; s < M; ++s) { Complex *g=G[s]+offset; if(symmetrize) HermitianSymmetrizeX(mx,my1,mx,g); xfftpad->backwards(g,v2+s*mu); } - + for(unsigned int s=0; s < M; ++s) { Complex *h=H[s]+offset; if(symmetrize) @@ -1779,15 +1779,15 @@ class ImplicitHTConvolution2 : public ThreadBase { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) { unsigned int thread=get_thread_num(); yconvolve->convolve(F,G,H,u[thread],v[thread],W[thread],i+offset); } - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) { unsigned int thread=get_thread_num(); yconvolve->convolve(U2,V2,W2,u[thread],v[thread],W[thread],i+offset); @@ -1795,7 +1795,7 @@ class ImplicitHTConvolution2 : public ThreadBase { xfftpad->forwards(F[0]+offset,u2); } - + // F, G, and H are distinct pointers to M distinct data blocks each of size // 2mx*(my+1), shifted by offset (contents not preserved). // The output is returned in F[0]. @@ -1821,7 +1821,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase { ImplicitHFGGConvolution *yconvolve; bool allocated; Complex **u,**v; -public: +public: void initpointers(Complex **&u, Complex **&v, unsigned int threads) { u=new Complex *[threads]; v=new Complex *[threads]; @@ -1832,21 +1832,21 @@ class ImplicitHFGGConvolution2 : public ThreadBase { v[i]=v1+imy1; } } - + void deletepointers(Complex **&u, Complex **&v) { delete [] v; delete [] u; } - + void init() { xfftpad=new fft0bipad(mx,my,my+1,u2,threads); - + yconvolve=new ImplicitHFGGConvolution(my,u1,v1); yconvolve->Threads(1); initpointers(u,v,threads); } - + // u1 and v1 are temporary arrays of size (my+1)*threads. // u2 and v2 are temporary arrays of size 2mx*(my+1). // threads is the number of threads to use in the outer subconvolution loop. @@ -1858,7 +1858,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase { allocated(false) { init(); } - + ImplicitHFGGConvolution2(unsigned int mx, unsigned int my, unsigned int threads=fftw::maxthreads) : ThreadBase(threads), mx(mx), my(my), @@ -1869,13 +1869,13 @@ class ImplicitHFGGConvolution2 : public ThreadBase { allocated(true) { init(); } - + ~ImplicitHFGGConvolution2() { deletepointers(u,v); - + delete yconvolve; delete xfftpad; - + if(allocated) { utils::deleteAlign(v2); utils::deleteAlign(u2); @@ -1883,32 +1883,32 @@ class ImplicitHFGGConvolution2 : public ThreadBase { utils::deleteAlign(u1); } } - + void convolve(Complex *f, Complex *g, Complex **u, Complex **v, Complex *u2, Complex *v2, bool symmetrize=true) { unsigned int my1=my+1; unsigned int mu=2*mx*my1; - + if(symmetrize) HermitianSymmetrizeX(mx,my1,mx,f); xfftpad->backwards(f,u2); - + if(symmetrize) HermitianSymmetrizeX(mx,my1,mx,g); xfftpad->backwards(g,v2); - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) { unsigned int thread=get_thread_num(); yconvolve->convolve(f+i,g+i,u[thread],v[thread]); } - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) { unsigned int thread=get_thread_num(); yconvolve->convolve(u2+i,v2+i,u[thread],v[thread]); @@ -1916,7 +1916,7 @@ class ImplicitHFGGConvolution2 : public ThreadBase { xfftpad->forwards(f,u2); } - + void convolve(Complex *f, Complex *g, bool symmetrize=true) { convolve(f,g,u,v,u2,v2,symmetrize); } @@ -1933,26 +1933,26 @@ class ImplicitHFFFConvolution2 : public ThreadBase { ImplicitHFFFConvolution *yconvolve; bool allocated; Complex **u; -public: +public: void initpointers(Complex **&u, unsigned int threads) { u=new Complex *[threads]; unsigned int my1=my+1; for(unsigned int i=0; i < threads; ++i) u[i]=u1+i*my1; } - + void deletepointers(Complex **&u) { delete [] u; } - + void init() { xfftpad=new fft0bipad(mx,my,my+1,u2,threads); - + yconvolve=new ImplicitHFFFConvolution(my,u1); yconvolve->Threads(1); initpointers(u,threads); } - + // u1 is a temporary array of size (my+1)*threads. // u2 is a temporary array of size 2mx*(my+1). // threads is the number of threads to use in the outer subconvolution loop. @@ -1963,7 +1963,7 @@ class ImplicitHFFFConvolution2 : public ThreadBase { u1(u1), u2(u2), allocated(false) { init(); } - + ImplicitHFFFConvolution2(unsigned int mx, unsigned int my, unsigned int threads=fftw::maxthreads) : ThreadBase(threads), mx(mx), my(my), @@ -1972,41 +1972,41 @@ class ImplicitHFFFConvolution2 : public ThreadBase { allocated(true) { init(); } - + ~ImplicitHFFFConvolution2() { deletepointers(u); delete yconvolve; delete xfftpad; - + if(allocated) { utils::deleteAlign(u2); utils::deleteAlign(u1); } } - + void convolve(Complex *f, Complex **u, Complex *u2, bool symmetrize=true) { unsigned int my1=my+1; unsigned int mu=2*mx*my1; - + if(symmetrize) HermitianSymmetrizeX(mx,my1,mx,f); xfftpad->backwards(f,u2); - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) yconvolve->convolve(f+i,u[get_thread_num()]); - + #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) -#endif +#endif for(unsigned int i=0; i < mu; i += my1) yconvolve->convolve(u2+i,u[get_thread_num()]); xfftpad->forwards(f,u2); } - + void convolve(Complex *f, bool symmetrize=true) { convolve(f,u,u2,symmetrize); } diff --git a/examples/example0.cc b/examples/example0.cc index 06a3d304..9e273f15 100644 --- a/examples/example0.cc +++ b/examples/example0.cc @@ -10,33 +10,33 @@ using namespace fftwpp; int main() { fftw::maxthreads=get_max_threads(); - - cout << "1D complex to complex in-place FFT, not using the Array class" + + cout << "1D complex to complex in-place FFT, not using the Array class" << endl; // cout << endl << "FFTW requires memory alignment of " << fftw_alignment() -// << "." << endl; - - unsigned int n=4; +// << "." << endl; + + unsigned int n=4; Complex *f=ComplexAlign(n); - + fft1d Forward(n,-1); fft1d Backward(n,1); - + for(unsigned int i=0; i < n; i++) f[i]=i; cout << "\ninput:" << endl; - for(unsigned int i=0; i < n; i++) cout << f[i] << endl; + for(unsigned int i=0; i < n; i++) cout << f[i] << endl; Forward.fft(f); cout << "\noutput:" << endl; - for(unsigned int i=0; i < n; i++) cout << f[i] << endl; + for(unsigned int i=0; i < n; i++) cout << f[i] << endl; Backward.fftNormalized(f); cout << "\ntransformed back:" << endl; for(unsigned int i=0; i < n; i++) cout << f[i] << endl; - + deleteAlign(f); } diff --git a/examples/example0m.cc b/examples/example0m.cc index f2e65df1..a26d0df2 100644 --- a/examples/example0m.cc +++ b/examples/example0m.cc @@ -10,24 +10,24 @@ using namespace utils; using namespace fftwpp; int main() -{ +{ cout << "Multiple 1D complex to complex in-place FFTs" << endl; fftw::maxthreads=4; - + unsigned int M=10; // Number of FFTs performed unsigned int n=8; // Length of FFT unsigned int N=10; // Number data points for timing test. Complex *f=ComplexAlign(n*M); - + mfft1d Forward(n,-1,M,M,1); mfft1d Backward(n,1,M,M,1); for(unsigned int i=0; i < n*M; i++) f[i]=i; unsigned int outlimit=100; - + if(n*M < outlimit) { cout << endl << "input:" << endl; for(unsigned int i=0; i < n; i++) { @@ -42,14 +42,14 @@ int main() // Timing test: seconds(); - + for(unsigned int j=0; j < N; ++j) { Forward.fft(f); Backward.fftNormalized(f); } - + double time=seconds(); - + if(n*M < outlimit) { cout << endl << "back to input:" << endl; for(unsigned int i=0; i < n; i++) { @@ -63,10 +63,10 @@ int main() cout << endl << "average seconds: " << time/N << endl; Complex sum=0.0; - for(unsigned int i=0; i < n*M; i++) + for(unsigned int i=0; i < n*M; i++) sum += f[i]; cout << endl << "sum of outputs (used for error-checking): " << sum << endl; - + deleteAlign(f); } diff --git a/examples/example0mr.cc b/examples/example0mr.cc index cb7ad601..30109ac9 100644 --- a/examples/example0mr.cc +++ b/examples/example0mr.cc @@ -7,17 +7,17 @@ using namespace Array; using namespace fftwpp; int main() -{ +{ cout << "Multiple 1D real-to-complex and complex-to-real FFTs" << endl; fftw::maxthreads=get_max_threads(); - + unsigned int nx=4, ny=4; unsigned int nyp=ny/2+1; size_t align=sizeof(Complex); - + cout << "Out-of-place transforms:" << endl; - + array2 f(nx,ny,align); array2 g(nx,nyp,align); size_t rstride=1; @@ -25,7 +25,7 @@ int main() size_t rdist=ny; size_t cdist=nyp; unsigned int M=nx; - + mrcfft1d Forward(ny, // length of transform M, // number of transforms rstride, @@ -44,18 +44,18 @@ int main() f); // output array // Initialize data: - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) f(i,j)=i+j; - + cout << endl << "input:" << endl << f; Forward.fft(f,g); - + cout << endl << "output:" << endl << g; - + Backward.fftNormalized(g,f); - + cout << endl << "back to input:" << endl << f; } diff --git a/examples/example0r.cc b/examples/example0r.cc index b7fe2e9c..f7f240b4 100644 --- a/examples/example0r.cc +++ b/examples/example0r.cc @@ -9,34 +9,34 @@ using namespace fftwpp; int main() { - cout << "1D real to complex out-of-place FFTs not using the Array class" + cout << "1D real to complex out-of-place FFTs not using the Array class" << endl; - + fftw::maxthreads=get_max_threads(); - + unsigned int n=4; unsigned int np=n/2+1; double *f=FFTWdouble(n); Complex *g=FFTWComplex(np); - + rcfft1d Forward(n,f,g); crfft1d Backward(n,g,f); - + for(unsigned int i=0; i < n; i++) f[i]=i; - + cout << "\ninput (" << n << " doubles):" << endl; for(unsigned int i=0; i < n; i++) cout << f[i] << (i!=n-1 ? " " : "\n"); Forward.fft(f,g); - + cout << "\noutput (" << np << " complex):" << endl; for(unsigned int i=0; i < np; i++) cout << g[i] << (i!=np-1 ? " " : "\n"); - + Backward.fftNormalized(g,f); - + cout << "\ntransformed back:" << endl; for(unsigned int i=0; i < n; i++) cout << f[i] << (i!=n-1 ? " " : "\n"); - + deleteAlign(g); deleteAlign(f); } diff --git a/examples/example1.cc b/examples/example1.cc index 60f24ff9..92a047e4 100644 --- a/examples/example1.cc +++ b/examples/example1.cc @@ -17,12 +17,12 @@ int main() unsigned int n=4; size_t align=sizeof(Complex); - + array1 f(n,align); - + fft1d Forward(-1,f); fft1d Backward(1,f); - + for(unsigned int i=0; i < n; i++) f[i]=i; cout << "\ninput:\n" << f << endl; @@ -30,7 +30,7 @@ int main() Forward.fft(f); cout << "\noutput:\n" << f << endl; - + Backward.fftNormalized(f); cout << "\nback to input:\n" << f << endl; diff --git a/examples/example1r.cc b/examples/example1r.cc index e91d9901..acef971f 100644 --- a/examples/example1r.cc +++ b/examples/example1r.cc @@ -12,29 +12,29 @@ using namespace fftwpp; int main() { cout << "1D real to complex out-of-place FFT" << endl; - + fftw::maxthreads=get_max_threads(); - + unsigned int n=4; unsigned int np=n/2+1; size_t align=sizeof(Complex); - + array1 F(np,align); array1 f(n,align); // For out-of-place transforms // array1 f(2*np,(double *) F()); // For in-place transforms - + rcfft1d Forward(n,f,F); crfft1d Backward(n,F,f); - + for(unsigned int i=0; i < n; i++) f[i]=i; cout << endl << "input:" << endl << f << endl; - + Forward.fft(f,F); - + cout << endl << "output:" << endl << F << endl; - + Backward.fftNormalized(F,f); - + cout << endl << "back to input:" << endl << f << endl; } diff --git a/examples/example2.cc b/examples/example2.cc index 434c7c39..93e65823 100644 --- a/examples/example2.cc +++ b/examples/example2.cc @@ -16,22 +16,22 @@ int main() unsigned int nx=4, ny=4; size_t align=sizeof(Complex); - + array2 f(nx,ny,align); - + fft2d Forward(-1,f); fft2d Backward(1,f); - - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) + + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) f(i,j)=Complex(i,j); cout << "\ninput:\n" << f; - + Forward.fft(f); - + cout << "\noutput:\n" << f; - + Backward.fftNormalized(f); cout << "\nback to input:\n" << f; diff --git a/examples/example2r.cc b/examples/example2r.cc index 71f6f349..03e05770 100644 --- a/examples/example2r.cc +++ b/examples/example2r.cc @@ -14,29 +14,29 @@ int main() cout << "2D real to complex out-of-place FFT" << endl; fftw::maxthreads=get_max_threads(); - + unsigned int nx=4, ny=5; unsigned int nyp=ny/2+1; size_t align=sizeof(Complex); - + array2 F(nx,nyp,align); array2 f(nx,ny,align); // For out-of-place transforms // array2 f(nx,2*nyp,(double *) F()); // For in-place transforms - + rcfft2d Forward(nx,ny,f,F); crfft2d Backward(nx,ny,F,f); - - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) + + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) f(i,j)=i+j; - + cout << endl << "input:" << endl << f; Forward.fft0(f,F); - + cout << endl << "output:" << endl << F; - + Backward.fft0Normalized(F,f); - + cout << endl << "back to input:" << endl << f; } diff --git a/examples/example3.cc b/examples/example3.cc index cb06c9ce..0632be7e 100644 --- a/examples/example3.cc +++ b/examples/example3.cc @@ -9,32 +9,32 @@ using namespace utils; using namespace Array; using namespace fftwpp; -int main() +int main() { cout << "3D complex to complex in-place FFT" << endl; fftw::maxthreads=get_max_threads(); - + unsigned int nx=4, ny=4, nz=4; size_t align=sizeof(Complex); - + array3 f(nx,ny,nz,align); - + fft3d Forward(-1,f); fft3d Backward(1,f); - - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) - for(unsigned int k=0; k < nz; k++) + + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) + for(unsigned int k=0; k < nz; k++) f(i,j,k)=Complex(10*k+i,j); - + cout << "\ninput:\n" << f; - + Forward.fft(f); - + cout << "\noutput:\n" << f; - + Backward.fftNormalized(f); - + cout << "\nback to input:\n" << f; } diff --git a/examples/example3r.cc b/examples/example3r.cc index 69d48942..0efa8f19 100644 --- a/examples/example3r.cc +++ b/examples/example3r.cc @@ -14,30 +14,30 @@ int main() cout << "3D real to complex out-of-place FFT" << endl; fftw::maxthreads=get_max_threads(); - + unsigned int nx=4, ny=5, nz=6; unsigned int nzp=nz/2+1; size_t align=sizeof(Complex); - + array3 F(nx,ny,nzp,align); array3 f(nx,ny,nz,align); // For out-of-place transforms // array3 f(nx,ny,2*nzp,(double *) F()); // For in-place transforms rcfft3d Forward(nx,ny,nz,f,F); crfft3d Backward(nx,ny,nz,F,f); - - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) - for(unsigned int k=0; k < nz; k++) + + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) + for(unsigned int k=0; k < nz; k++) f(i,j,k)=i+j+k; - + cout << "\ninput:\n" << f; - + Forward.fft(f,F); - + cout << "\noutput:\n" << F; - + Backward.fftNormalized(F,f); - + cout << "\nback to input:\n" << f; } diff --git a/examples/examplecconv.cc b/examples/examplecconv.cc index ac696a05..38e31d74 100644 --- a/examples/examplecconv.cc +++ b/examples/examplecconv.cc @@ -10,7 +10,7 @@ using namespace utils; using namespace fftwpp; -void init(Complex *f, Complex *g, unsigned int m) +void init(Complex *f, Complex *g, unsigned int m) { for(unsigned int k=0; k < m; k++) { f[k]=Complex(k,k+1); @@ -24,28 +24,28 @@ int main(int argc, char* argv[]) // size of problem unsigned int m=8; - + #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + // allocate arrays: Complex *f=ComplexAlign(m); Complex *g=ComplexAlign(m); - + cout << "1d non-centered complex convolution:" << endl; init(f,g,m); cout << "\ninput:\nf\tg" << endl; - for(unsigned int i=0; i < m; i++) + for(unsigned int i=0; i < m; i++) cout << f[i] << "\t" << g[i] << endl; - + ImplicitConvolution C(m); C.convolve(f,g); cout << "\noutput:" << endl; for(unsigned int i=0; i < m; i++) cout << f[i] << endl; - + deleteAlign(g); deleteAlign(f); diff --git a/examples/examplecconv2.cc b/examples/examplecconv2.cc index e855fbb9..de74efcb 100644 --- a/examples/examplecconv2.cc +++ b/examples/examplecconv2.cc @@ -14,7 +14,7 @@ using namespace fftwpp; unsigned int mx=4; unsigned int my=4; -inline void init(array2& f, array2& g) +inline void init(array2& f, array2& g) { for(unsigned int i=0; i < mx; ++i) { for(unsigned int j=0; j < my; j++) { @@ -30,7 +30,7 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif +#endif cout << "2D non-centered complex convolution:" << endl; diff --git a/examples/examplecconv3.cc b/examples/examplecconv3.cc index 56c766fc..779da8cb 100644 --- a/examples/examplecconv3.cc +++ b/examples/examplecconv3.cc @@ -15,7 +15,7 @@ unsigned int mx=4; unsigned int my=4; unsigned int mz=4; -inline void init(array3& f, array3& g, unsigned int M=1) +inline void init(array3& f, array3& g, unsigned int M=1) { double factor=1.0/sqrt((double) M); for(unsigned int s=0; s < M; ++s) { @@ -40,7 +40,7 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif +#endif cout << "3D non-centered complex convolution:" << endl; diff --git a/examples/examplecconvfull.cc b/examples/examplecconvfull.cc index e3019bf0..e1a7a1f1 100644 --- a/examples/examplecconvfull.cc +++ b/examples/examplecconvfull.cc @@ -11,7 +11,7 @@ using namespace fftwpp; double f0[]={0,1,2,3,4}; -void init(Complex *f, Complex *g, unsigned int m) +void init(Complex *f, Complex *g, unsigned int m) { for(unsigned int k=0; k < m; k++) { f[k]=f0[k]; @@ -26,9 +26,9 @@ void multbinaryfull(Complex **F, unsigned int m, { Complex* F0=F[0]; Complex* F1=F[1]; - + double sign=r == 0 ? 1 : -1; - + PARALLEL( for(unsigned int j=0; j < m; ++j) { Complex *F0j=F0+j; @@ -46,30 +46,30 @@ int main(int argc, char* argv[]) // size of problem unsigned int m=sizeof(f0)/sizeof(double); - + #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + // allocate arrays: Complex *F[2]; Complex *f=F[0]=ComplexAlign(2*m); Complex *g=F[1]=f+m; - + ImplicitConvolution C(m,2,2); cout << "1d non-centered complex convolution:" << endl; init(f,g,m); cout << "\ninput:\nf\tg" << endl; - for(unsigned int i=0; i < m; i++) + for(unsigned int i=0; i < m; i++) cout << f[i] << "\t" << g[i] << endl; - + C.convolve(F,multbinaryfull); cout << "\noutput:" << endl; for(unsigned int i=0; i < m; i++) cout << f[i] << endl; for(unsigned int i=0; i < m-1; i++) cout << g[i] << endl; - + deleteAlign(f); return 0; diff --git a/examples/examplecconvsame.cc b/examples/examplecconvsame.cc index ddb2bfb7..a9461db2 100644 --- a/examples/examplecconvsame.cc +++ b/examples/examplecconvsame.cc @@ -13,7 +13,7 @@ using namespace fftwpp; double f0[]={0,1,2,3,4}; -void init(Complex *f, Complex *g, unsigned int m) +void init(Complex *f, Complex *g, unsigned int m) { for(unsigned int k=0; k < m; k++) { f[k]=f0[k]; @@ -31,7 +31,7 @@ class ZetaTable { s=BuildZeta(twopi*c/(2*m),2*m,ZetaH,ZetaL); } }; - + static void multbinarysame(Complex **F, unsigned int m, const unsigned int indexsize, const unsigned int *index, @@ -42,7 +42,7 @@ static void multbinarysame(Complex **F, unsigned int m, Complex* F0=F[0]; Complex* F1=F[1]; - + unsigned int c=m/2; if(2*c == m) { if(r == 0) { @@ -65,7 +65,7 @@ static void multbinarysame(Complex **F, unsigned int m, sign=-sign; } ); - + } } else { if(m != lastm) { @@ -77,11 +77,11 @@ static void multbinarysame(Complex **F, unsigned int m, list[m]=zeta; } else zeta=p->second; } - + Complex *ZetaH=zeta.ZetaH; Complex *ZetaL=zeta.ZetaL; unsigned int s=zeta.s; - + PARALLEL( for(unsigned int j=0; j < m; ++j) { Complex *F0j=F0+j; @@ -103,29 +103,29 @@ int main(int argc, char* argv[]) // size of problem unsigned int m=sizeof(f0)/sizeof(double); - + #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + // allocate arrays: Complex *F[2]; Complex *f=F[0]=ComplexAlign(2*m); Complex *g=F[1]=f+m; - + ImplicitConvolution C(m); cout << "1d non-centered complex convolution:" << endl; init(f,g,m); cout << "\ninput:\nf\tg" << endl; - for(unsigned int i=0; i < m; i++) + for(unsigned int i=0; i < m; i++) cout << f[i] << "\t" << g[i] << endl; - + C.convolve(F,multbinarysame); cout << "\noutput:" << endl; for(unsigned int i=0; i < m; i++) cout << f[i] << endl; - + deleteAlign(f); return 0; diff --git a/examples/exampleconv.cc b/examples/exampleconv.cc index 4277c851..e490adb9 100644 --- a/examples/exampleconv.cc +++ b/examples/exampleconv.cc @@ -10,7 +10,7 @@ using namespace utils; using namespace Array; using namespace fftwpp; -inline void init(Complex *f, Complex *g, unsigned int m) +inline void init(Complex *f, Complex *g, unsigned int m) { for(unsigned int k=0; k < m; k++) { f[k]=Complex(k,k+1); @@ -27,8 +27,8 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + // 1d centered Hermitian-symmetric complex convolution cout << "1D centered Hermitian-symmetric convolution:" << endl; @@ -38,15 +38,15 @@ int main(int argc, char* argv[]) init(f,g,m); cout << "\ninput:\nf\tg" << endl; - for(unsigned int i=0; i < m; i++) + for(unsigned int i=0; i < m; i++) cout << f[i] << "\t" << g[i] << endl; - + ImplicitHConvolution C(m); C.convolve(f,g); - + cout << "\noutput:" << endl; for(unsigned int i=0; i < m; i++) cout << f[i] << endl; - + deleteAlign(g); deleteAlign(f); diff --git a/examples/exampleconv2.cc b/examples/exampleconv2.cc index 2e1d1514..79b92703 100644 --- a/examples/exampleconv2.cc +++ b/examples/exampleconv2.cc @@ -16,7 +16,7 @@ unsigned int m=8; unsigned int mx=4; unsigned int my=4; -inline void init(array2& f, array2& g, unsigned int M=1) +inline void init(array2& f, array2& g, unsigned int M=1) { unsigned int stop=2*mx-1; unsigned int stopoffset=stop; @@ -41,8 +41,8 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + cout << "2D centered Hermitian-symmetric convolution:" << endl; size_t align=sizeof(Complex); @@ -52,7 +52,7 @@ int main(int argc, char* argv[]) cout << "\ninput:" << endl; cout << "f:" << endl << f; cout << "g:" << endl << g; - + /* cout << "input after symmetrization (done automatically):" << endl; HermitianSymmetrizeX(mx,my,mx-1,f); @@ -60,7 +60,7 @@ int main(int argc, char* argv[]) cout << "f:" << endl << f << endl; cout << "g:" << endl << g << endl; */ - + bool symmetrize=true; ImplicitHConvolution2 C(mx,my); C.convolve(f,g,symmetrize); diff --git a/examples/exampleconv3.cc b/examples/exampleconv3.cc index 09e0f1ae..6882bee0 100644 --- a/examples/exampleconv3.cc +++ b/examples/exampleconv3.cc @@ -15,7 +15,7 @@ unsigned int mx=4; unsigned int my=4; unsigned int mz=4; -inline void init(array3& f, array3& g, unsigned int M=1) +inline void init(array3& f, array3& g, unsigned int M=1) { unsigned int xstop=2*mx-1; unsigned int ystop=2*my-1; @@ -42,8 +42,8 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - +#endif + cout << "3D centered Hermitian-symmetric convolution:" << endl; size_t align=sizeof(Complex); @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) cout << "\ninput:" << endl; cout << "f:" << endl << f; cout << "g:" << endl << g; - + /* cout << "input after symmetrization (done automatically):" << endl; HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,f); @@ -62,7 +62,7 @@ int main(int argc, char* argv[]) cout << "f:" << endl << f << endl; cout << "g:" << endl << g << endl; */ - + bool symmetrize=true; ImplicitHConvolution3 C(mx,my,mz); C.convolve(f,g,symmetrize); diff --git a/examples/exampletranspose.cc b/examples/exampletranspose.cc index 22e984be..0951ffe4 100644 --- a/examples/exampletranspose.cc +++ b/examples/exampletranspose.cc @@ -16,23 +16,23 @@ int main() unsigned int nx=4, ny=4; size_t align=sizeof(Complex); - + array2 f(nx,ny,align); array2 ft(ny,nx,align); - + Transpose T(nx,ny,1,f(),ft(),fftw::maxthreads); Transpose Tinv(ny,nx,1,ft(),f(),fftw::maxthreads); - - for(unsigned int i=0; i < nx; i++) - for(unsigned int j=0; j < ny; j++) + + for(unsigned int i=0; i < nx; i++) + for(unsigned int j=0; j < ny; j++) f(i,j)=Complex(i,j); cout << "\ninput:\n" << f; - + T.transpose(f(),ft()); - + cout << "\noutput:\n" << ft; - + Tinv.transpose(ft(),f()); cout << "\nback to input:\n" << f; diff --git a/fftw++.h b/fftw++.h index 01e1f551..467387d3 100644 --- a/fftw++.h +++ b/fftw++.h @@ -2,7 +2,7 @@ Copyright (C) 2004-16 John C. Bowman, University of Alberta Malcolm Roberts, University of Strasbourg - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or @@ -39,22 +39,22 @@ #include #endif -inline int get_thread_num() +inline int get_thread_num() { #ifdef FFTWPP_SINGLE_THREAD return 0; #else return omp_get_thread_num(); -#endif +#endif } -inline int get_max_threads() +inline int get_max_threads() { #ifdef FFTWPP_SINGLE_THREAD return 1; #else return omp_get_max_threads(); -#endif +#endif } #ifndef FFTWPP_SINGLE_THREAD @@ -69,7 +69,7 @@ inline int get_max_threads() #define PARALLEL(code) \ { \ code \ - } + } #endif #ifndef __Complex_h__ @@ -117,12 +117,12 @@ class ThreadBase protected: unsigned int threads; unsigned int innerthreads; -public: +public: ThreadBase(); ThreadBase(unsigned int threads) : threads(threads) {} void Threads(unsigned int nthreads) {threads=nthreads;} unsigned int Threads() {return threads;} - + void multithread(unsigned int nx) { if(nx >= threads) { innerthreads=1; @@ -137,17 +137,17 @@ inline unsigned int realsize(unsigned int n, Complex *in, Complex *out=NULL) { return (!out || in == out) ? 2*(n/2+1) : n; } - + inline unsigned int realsize(unsigned int n, Complex *in, double *out) { - return realsize(n,in,(Complex *) out); + return realsize(n,in,(Complex *) out); } - + inline unsigned int realsize(unsigned int n, double *in, Complex *out) { - return realsize(n,(Complex *) in,out); + return realsize(n,(Complex *) in,out); } - + // Base clase for fft routines // class fftw : public ThreadBase { @@ -159,24 +159,24 @@ class fftw : public ThreadBase { fftw_plan plan; bool inplace; - + unsigned int Dist(unsigned int n, size_t stride, size_t dist) { return dist ? dist : ((stride == 1) ? n : 1); } - + static const double twopi; - + public: static unsigned int effort; static unsigned int maxthreads; static double testseconds; static const char *WisdomName; static fftw_plan (*planner)(fftw *f, Complex *in, Complex *out); - + virtual unsigned int Threads() {return threads;} - + static const char *oddshift; - + // Inplace shift of Fourier origin to (nx/2,0) for even nx. static void Shift(Complex *data, unsigned int nx, unsigned int ny, unsigned int threads) { @@ -260,41 +260,41 @@ class fftw : public ThreadBase { exit(1); } } - + fftw() : plan(NULL) {} fftw(unsigned int doubles, int sign, unsigned int threads, unsigned int n=0) : - doubles(doubles), sign(sign), threads(threads), + doubles(doubles), sign(sign), threads(threads), norm(1.0/(n ? n : doubles/2)), plan(NULL) { #ifndef FFTWPP_SINGLE_THREAD fftw_init_threads(); -#endif +#endif } - + virtual ~fftw() { if(plan) fftw_destroy_plan(plan); } - + virtual fftw_plan Plan(Complex *in, Complex *out) {return NULL;}; - + inline void CheckAlign(Complex *p, const char *s) { if((size_t) p % sizeof(Complex) == 0) return; - std::cerr << "WARNING: " << s << " array is not " << sizeof(Complex) + std::cerr << "WARNING: " << s << " array is not " << sizeof(Complex) << "-byte aligned: address " << p << std::endl; } - + void noplan() { std::cerr << "Unable to construct FFTW plan" << std::endl; exit(1); } - + static void planThreads(unsigned int threads) { #ifndef FFTWPP_SINGLE_THREAD omp_set_num_threads(threads); fftw_plan_with_nthreads(threads); -#endif +#endif } - + threaddata time(fftw_plan plan1, fftw_plan planT, Complex *in, Complex *out, unsigned int Threads) { utils::statistics S,ST; @@ -339,30 +339,30 @@ class fftw : public ThreadBase { } return threaddata(threads,S.mean(),S.stdev()); } - + virtual threaddata lookup(bool inplace, unsigned int threads) { return threaddata(); } virtual void store(bool inplace, const threaddata& data) {} - + inline Complex *CheckAlign(Complex *in, Complex *out, bool constructor=true) { -#ifndef NO_CHECK_ALIGN +#ifndef NO_CHECK_ALIGN CheckAlign(in,constructor ? "constructor input" : "input"); if(out) CheckAlign(out,constructor ? "constructor output" : "output"); else out=in; #else if(!out) out=in; -#endif +#endif return out; } - + threaddata Setup(Complex *in, Complex *out=NULL) { bool alloc=!in; if(alloc) in=utils::ComplexAlign((doubles+1)/2); out=CheckAlign(in,out); inplace=(out==in); - + threaddata data; unsigned int Threads=threads; if(threads > 1) data=lookup(inplace,threads); @@ -370,13 +370,13 @@ class fftw : public ThreadBase { planThreads(threads); plan=(*planner)(this,in,out); if(!plan) noplan(); - + fftw_plan planT; if(fftw::maxthreads > 1) { threads=Threads; planThreads(threads); planT=(*planner)(this,in,out); - + if(data.threads == 0) { if(planT) data=time(plan,planT,in,out,threads); @@ -384,11 +384,11 @@ class fftw : public ThreadBase { store(inplace,threaddata(threads,data.mean,data.stdev)); } } - + if(alloc) Array::deleteAlign(in,(doubles+1)/2); return data; } - + threaddata Setup(Complex *in, double *out) { return Setup(in,(Complex *) out); } @@ -396,11 +396,11 @@ class fftw : public ThreadBase { threaddata Setup(double *in, Complex *out=NULL) { return Setup((Complex *) in,out); } - + virtual void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out); } - + Complex *Setout(Complex *in, Complex *out) { out=CheckAlign(in,out,false); if(inplace ^ (out == in)) { @@ -409,33 +409,33 @@ class fftw : public ThreadBase { } return out; } - + void fft(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out); } - + void fft(double *in, Complex *out=NULL) { fft((Complex *) in,out); } - + void fft(Complex *in, double *out) { fft(in,(Complex *) out); } - + void fft0(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out,true); } - + void fft0(double *in, Complex *out=NULL) { fft0((Complex *) in,out); } - + void fft0(Complex *in, double *out) { fft0(in,(Complex *) out); } - + void Normalize(Complex *out) { unsigned int stop=doubles/2; #ifndef FFTWPP_SINGLE_THREAD @@ -450,29 +450,29 @@ class fftw : public ThreadBase { #endif for(unsigned int i=0; i < doubles; i++) out[i] *= norm; } - - virtual void fftNormalized(Complex *in, Complex *out=NULL, bool shift=false) + + virtual void fftNormalized(Complex *in, Complex *out=NULL, bool shift=false) { out=Setout(in,out); Execute(in,out,shift); Normalize(out); } - + virtual void fftNormalized(Complex *in, double *out, bool shift=false) { out=(double *) Setout(in,(Complex *) out); Execute(in,(Complex *) out,shift); Normalize(out); } - + virtual void fftNormalized(double *in, Complex *out, bool shift=false) { fftNormalized((Complex *) in,out,shift); } - + template void fft0Normalized(I in, O out) { fftNormalized(in,out,true); } - + template void Normalize(unsigned int nx, unsigned int M, size_t ostride, size_t odist, O *out) { @@ -488,7 +488,7 @@ class fftw : public ThreadBase { } } } - + template void fftNormalized(unsigned int nx, unsigned int M, size_t ostride, size_t odist, I *in, O *out=NULL, bool shift=false) { @@ -508,7 +508,7 @@ class Transpose { T *in, T *out=NULL, unsigned int threads=fftw::maxthreads) { unsigned int size=sizeof(T); if(size % sizeof(double) != 0) { - std::cerr << "ERROR: Transpose is not implemented for type of size " + std::cerr << "ERROR: Transpose is not implemented for type of size " << size; exit(1); } @@ -520,14 +520,14 @@ class Transpose { if(!out) out=in; inplace=(out==in); fftw::planThreads(threads); - + fftw_iodim dims[3]; - dims[0].n=rows; + dims[0].n=rows; dims[0].is=cols*length; dims[0].os=length; - - dims[1].n=cols; + + dims[1].n=cols; dims[1].is=length; dims[1].os=rows*length; @@ -543,7 +543,7 @@ class Transpose { ~Transpose() { if(plan) fftw_destroy_plan(plan); } - + template void transpose(T *in, T *out=NULL) { if(!plan) return; @@ -565,7 +565,7 @@ class Threadtable { typename Table::iterator p=table.find(key); return p == table.end() ? threaddata() : p->second; } - + void Store(Table& threadtable, T key, const threaddata& data) { threadtable[key]=data; } @@ -575,10 +575,10 @@ struct keytype1 { unsigned int nx; unsigned int threads; bool inplace; - keytype1(unsigned int nx, unsigned int threads, bool inplace) : + keytype1(unsigned int nx, unsigned int threads, bool inplace) : nx(nx), threads(threads), inplace(inplace) {} }; - + struct keyless1 { bool operator()(const keytype1& a, const keytype1& b) const { return a.nx < b.nx || (a.nx == b.nx && @@ -593,10 +593,10 @@ struct keytype2 { unsigned int threads; bool inplace; keytype2(unsigned int nx, unsigned int ny, unsigned int threads, - bool inplace) : + bool inplace) : nx(nx), ny(ny), threads(threads), inplace(inplace) {} }; - + struct keyless2 { bool operator()(const keytype2& a, const keytype2& b) const { return a.nx < b.nx || (a.nx == b.nx && @@ -614,13 +614,13 @@ struct keytype3 { unsigned int threads; bool inplace; keytype3(unsigned int nx, unsigned int ny, unsigned int nz, - unsigned int threads, bool inplace) : + unsigned int threads, bool inplace) : nx(nx), ny(ny), nz(nz), threads(threads), inplace(inplace) {} }; - + struct keyless3 { bool operator()(const keytype3& a, const keytype3& b) const { - return a.nx < b.nx || (a.nx == b.nx && + return a.nx < b.nx || (a.nx == b.nx && (a.ny < b.ny || (a.ny == b.ny && (a.nz < b.nz || (a.nz == b.nz && @@ -634,7 +634,7 @@ struct keyless3 { // Before calling fft(), the arrays in and out (which may coincide) must be // allocated as Complex[n]. // -// Out-of-place usage: +// Out-of-place usage: // // fft1d Forward(n,-1,in,out); // Forward.fft(in,out); @@ -656,31 +656,31 @@ struct keyless3 { class fft1d : public fftw, public Threadtable { unsigned int nx; static Table threadtable; -public: +public: fft1d(unsigned int nx, int sign, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads) - : fftw(2*nx,sign,threads), nx(nx) {Setup(in,out);} - + : fftw(2*nx,sign,threads), nx(nx) {Setup(in,out);} + #ifdef __Array_h__ fft1d(int sign, const Array::array1& in, const Array::array1& out=Array::NULL1, - unsigned int threads=maxthreads) - : fftw(2*in.Nx(),sign,threads), nx(in.Nx()) {Setup(in,out);} -#endif - + unsigned int threads=maxthreads) + : fftw(2*in.Nx(),sign,threads), nx(in.Nx()) {Setup(in,out);} +#endif + threaddata lookup(bool inplace, unsigned int threads) { return this->Lookup(threadtable,keytype1(nx,threads,inplace)); } void store(bool inplace, const threaddata& data) { this->Store(threadtable,keytype1(nx,data.threads,inplace),data); } - + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_1d(nx,(fftw_complex *) in,(fftw_complex *) out, sign,effort); } }; - + template class fftwblock : public virtual fftw { public: @@ -699,19 +699,19 @@ class fftwblock : public virtual fftw { T=1; Q=M; R=0; - + threaddata S1=Setup(in,out); fftw_plan planT1=plan; - + if(fftw::maxthreads > 1) { if(Threads > 1) { T=std::min(M,Threads); Q=T > 0 ? M/T : 0; R=M-Q*T; - + threads=Threads; threaddata ST=Setup(in,out); - + if(R > 0 && threads == 1 && plan1 != plan2) { fftw_destroy_plan(plan2); plan2=plan1; @@ -735,23 +735,23 @@ class fftwblock : public virtual fftw { } else Setup(in,out); // Synchronize wisdom } - } - + } + fftw_plan Plan(int Q, fftw_complex *in, fftw_complex *out) { return fftw_plan_many_dft(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,sign,effort); } - + fftw_plan Plan(int Q, double *in, fftw_complex *out) { return fftw_plan_many_dft_r2c(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,effort); } - + fftw_plan Plan(int Q, fftw_complex *in, double *out) { return fftw_plan_many_dft_c2r(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,effort); } - + fftw_plan Plan(Complex *in, Complex *out) { if(R > 0) { plan2=Plan(Q+1,(I *) in,(O *) out); @@ -760,11 +760,11 @@ class fftwblock : public virtual fftw { } return Plan(Q,(I *) in,(O *) out); } - + void Execute(fftw_plan plan, fftw_complex *in, fftw_complex *out) { fftw_execute_dft(plan,in,out); } - + void Execute(fftw_plan plan, double *in, fftw_complex *out) { fftw_execute_dft_r2c(plan,in,out); } @@ -792,20 +792,20 @@ class fftwblock : public virtual fftw { } } } - + unsigned int Threads() {return std::max(T,threads);} - + ~fftwblock() { if(plan2) fftw_destroy_plan(plan2); } }; - + // Compute the complex Fourier transform of M complex vectors, each of // length n. // Before calling fft(), the arrays in and out (which may coincide) must be // allocated as Complex[M*n]. // -// Out-of-place usage: +// Out-of-place usage: // // mfft1d Forward(n,-1,M,stride,dist,in,out); // Forward.fft(in,out); @@ -823,14 +823,14 @@ class fftwblock : public virtual fftw { class mfft1d : public fftwblock, public Threadtable { static Table threadtable; -public: +public: mfft1d(unsigned int nx, int sign, unsigned int M=1, size_t stride=1, size_t dist=0, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads) : fftw(2*((nx-1)*stride+(M-1)*Dist(nx,stride,dist)+1),sign,threads,nx), fftwblock - (nx,M,stride,stride,dist,dist,in,out,threads) {} - + (nx,M,stride,stride,dist,dist,in,out,threads) {} + mfft1d(unsigned int nx, int sign, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads): @@ -838,8 +838,8 @@ class mfft1d : public fftwblock, 2*((nx-1)*ostride+(M-1)*Dist(nx,ostride,odist)+1)),sign, threads, nx), fftwblock(nx,M,istride,ostride,idist,odist,in, - out,threads) {} - + out,threads) {} + threaddata lookup(bool inplace, unsigned int threads) { return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace)); } @@ -847,13 +847,13 @@ class mfft1d : public fftwblock, Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data); } }; - + // Compute the complex Fourier transform of n real values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[n] and // the array out must be allocated as Complex[n/2+1]. The arrays in and out // may coincide, allocated as Complex[n/2+1]. // -// Out-of-place usage: +// Out-of-place usage: // // rcfft1d Forward(n,in,out); // Forward.fft(in,out); @@ -862,7 +862,7 @@ class mfft1d : public fftwblock, // // rcfft1d Forward(n); // Forward.fft(out); -// +// // Notes: // in contains the n real values stored as a Complex array; // out contains the first n/2+1 Complex Fourier values. @@ -870,36 +870,36 @@ class mfft1d : public fftwblock, class rcfft1d : public fftw, public Threadtable { unsigned int nx; static Table threadtable; -public: - rcfft1d(unsigned int nx, Complex *out=NULL, unsigned int threads=maxthreads) +public: + rcfft1d(unsigned int nx, Complex *out=NULL, unsigned int threads=maxthreads) : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(out,(double*) NULL);} - + rcfft1d(unsigned int nx, double *in, Complex *out=NULL, - unsigned int threads=maxthreads) + unsigned int threads=maxthreads) : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(in,out);} - + threaddata lookup(bool inplace, unsigned int threads) { return Lookup(threadtable,keytype1(nx,threads,inplace)); } void store(bool inplace, const threaddata& data) { Store(threadtable,keytype1(nx,data.threads,inplace),data); } - + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_1d(nx,(double *) in,(fftw_complex *) out, effort); } - + void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } }; - + // Compute the real inverse Fourier transform of the n/2+1 Complex values // corresponding to the non-negative part of the frequency spectrum, using // phase sign +1. // Before calling fft(), the array in must be allocated as Complex[n/2+1] // and the array out must be allocated as double[n]. The arrays in and out -// may coincide, allocated as Complex[n/2+1]. +// may coincide, allocated as Complex[n/2+1]. // // Out-of-place usage (input destroyed): // @@ -910,7 +910,7 @@ class rcfft1d : public fftw, public Threadtable { // // crfft1d Backward(n); // Backward.fft(in); -// +// // Notes: // in contains the first n/2+1 Complex Fourier values. // out contains the n real values stored as a Complex array; @@ -918,25 +918,25 @@ class rcfft1d : public fftw, public Threadtable { class crfft1d : public fftw, public Threadtable { unsigned int nx; static Table threadtable; -public: - crfft1d(unsigned int nx, double *out=NULL, unsigned int threads=maxthreads) - : fftw(2*(nx/2+1),1,threads,nx), nx(nx) {Setup(out);} - - crfft1d(unsigned int nx, Complex *in, double *out=NULL, +public: + crfft1d(unsigned int nx, double *out=NULL, unsigned int threads=maxthreads) + : fftw(2*(nx/2+1),1,threads,nx), nx(nx) {Setup(out);} + + crfft1d(unsigned int nx, Complex *in, double *out=NULL, unsigned int threads=maxthreads) - : fftw(realsize(nx,in,out),1,threads,nx), nx(nx) {Setup(in,out);} - + : fftw(realsize(nx,in,out),1,threads,nx), nx(nx) {Setup(in,out);} + threaddata lookup(bool inplace, unsigned int threads) { return Lookup(threadtable,keytype1(nx,threads,inplace)); } void store(bool inplace, const threaddata& data) { Store(threadtable,keytype1(nx,data.threads,inplace),data); } - + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_1d(nx,(fftw_complex *) in,(double *) out,effort); } - + void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); } @@ -948,7 +948,7 @@ class crfft1d : public fftw, public Threadtable { // Complex[M*(n/2+1)]. The arrays in and out may coincide, // allocated as Complex[M*(n/2+1)]. // -// Out-of-place usage: +// Out-of-place usage: // // mrcfft1d Forward(n,M,istride,ostride,idist,odist,in,out); // Forward.fft(in,out); @@ -957,7 +957,7 @@ class crfft1d : public fftw, public Threadtable { // // mrcfft1d Forward(n,M,istride,ostride,idist,odist); // Forward.fft(out); -// +// // Notes: // istride is the spacing between the elements of each real vector; // ostride is the spacing between the elements of each Complex vector; @@ -974,28 +974,28 @@ class mrcfft1d : public fftwblock, size_t istride, size_t ostride, size_t idist, size_t odist, double *in=NULL, Complex *out=NULL, - unsigned int threads=maxthreads) + unsigned int threads=maxthreads) : fftw(std::max((realsize(nx,in,out)-2)*istride+(M-1)*idist+2, 2*(nx/2*ostride+(M-1)*odist+1)),-1,threads,nx), fftwblock (nx,M,istride,ostride,idist,odist,(Complex *) in,out,threads) {} - + threaddata lookup(bool inplace, unsigned int threads) { return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace)); } - + void store(bool inplace, const threaddata& data) { Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data); } - + void Normalize(Complex *out) { fftw::Normalize(nx/2+1,M,ostride,odist,out); } - + void fftNormalized(double *in, Complex *out=NULL, bool shift=false) { fftw::fftNormalized(nx/2+1,M,ostride,odist,in,out,false); } - + void fft0Normalized(double *in, Complex *out=NULL) { fftw::fftNormalized(nx/2+1,M,ostride,odist,in,out,true); } @@ -1006,7 +1006,7 @@ class mrcfft1d : public fftwblock, // spectra, using phase sign +1. Before calling fft(), the array in must be // allocated as Complex[M*(n/2+1)] and the array out must be allocated as // double[M*n]. The arrays in and out may coincide, -// allocated as Complex[M*(n/2+1)]. +// allocated as Complex[M*(n/2+1)]. // // Out-of-place usage (input destroyed): // @@ -1017,7 +1017,7 @@ class mrcfft1d : public fftwblock, // // mcrfft1d Backward(n,M,istride,ostride,idist,odist); // Backward.fft(out); -// +// // Notes: // stride is the spacing between the elements of each Complex vector; // dist is the spacing between the first elements of the vectors; @@ -1030,38 +1030,38 @@ class mcrfft1d : public fftwblock, public: mcrfft1d(unsigned int nx, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, double *out=NULL, - unsigned int threads=maxthreads) + unsigned int threads=maxthreads) : fftw(std::max(2*(nx/2*istride+(M-1)*idist+1), (realsize(nx,in,out)-2)*ostride+(M-1)*odist+2),1,threads,nx), fftwblock (nx,M,istride,ostride,idist,odist,in,(Complex *) out,threads) {} - + threaddata lookup(bool inplace, unsigned int threads) { return Lookup(threadtable,keytype3(nx,Q,R,threads,inplace)); } - + void store(bool inplace, const threaddata& data) { Store(threadtable,keytype3(nx,Q,R,data.threads,inplace),data); } - + void Normalize(double *out) { fftw::Normalize(nx,M,ostride,odist,out); } - + void fftNormalized(Complex *in, double *out=NULL, bool shift=false) { fftw::fftNormalized(nx,M,ostride,odist,in,out,false); } - + void fft0Normalized(Complex *in, double *out=NULL) { fftw::fftNormalized(nx,M,ostride,odist,in,out,true); } }; - + // Compute the complex two-dimensional Fourier transform of nx times ny // complex values. Before calling fft(), the arrays in and out (which may // coincide) must be allocated as Complex[nx*ny]. // -// Out-of-place usage: +// Out-of-place usage: // // fft2d Forward(nx,ny,-1,in,out); // Forward.fft(in,out); @@ -1087,32 +1087,32 @@ class fft2d : public fftw, public Threadtable { unsigned int nx; unsigned int ny; static Table threadtable; -public: +public: fft2d(unsigned int nx, unsigned int ny, int sign, Complex *in=NULL, - Complex *out=NULL, unsigned int threads=maxthreads) - : fftw(2*nx*ny,sign,threads), nx(nx), ny(ny) {Setup(in,out);} - + Complex *out=NULL, unsigned int threads=maxthreads) + : fftw(2*nx*ny,sign,threads), nx(nx), ny(ny) {Setup(in,out);} + #ifdef __Array_h__ fft2d(int sign, const Array::array2& in, - const Array::array2& out=Array::NULL2, - unsigned int threads=maxthreads) + const Array::array2& out=Array::NULL2, + unsigned int threads=maxthreads) : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()) { Setup(in,out); } -#endif - +#endif + threaddata lookup(bool inplace, unsigned int threads) { return this->Lookup(threadtable,keytype2(nx,ny,threads,inplace)); } void store(bool inplace, const threaddata& data) { this->Store(threadtable,keytype2(nx,ny,data.threads,inplace),data); } - + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_2d(nx,ny,(fftw_complex *) in,(fftw_complex *) out, sign,effort); } - + void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out); } @@ -1122,9 +1122,9 @@ class fft2d : public fftw, public Threadtable { // values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[nx*ny] and // the array out must be allocated as Complex[nx*(ny/2+1)]. The arrays in -// and out may coincide, allocated as Complex[nx*(ny/2+1)]. +// and out may coincide, allocated as Complex[nx*(ny/2+1)]. // -// Out-of-place usage: +// Out-of-place usage: // // rcfft2d Forward(nx,ny,in,out); // Forward.fft(in,out); // Origin of Fourier domain at (0,0) @@ -1136,7 +1136,7 @@ class fft2d : public fftw, public Threadtable { // rcfft2d Forward(nx,ny); // Forward.fft(in); // Origin of Fourier domain at (0,0) // Forward.fft0(in); // Origin of Fourier domain at (nx/2,0) -// +// // Notes: // in contains the nx*ny real values stored as a Complex array; // out contains the upper-half portion (ky >= 0) of the Complex transform. @@ -1144,22 +1144,22 @@ class fft2d : public fftw, public Threadtable { class rcfft2d : public fftw { unsigned int nx; unsigned int ny; -public: +public: rcfft2d(unsigned int nx, unsigned int ny, Complex *out=NULL, - unsigned int threads=maxthreads) - : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} - + unsigned int threads=maxthreads) + : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} + rcfft2d(unsigned int nx, unsigned int ny, double *in, Complex *out=NULL, - unsigned int threads=maxthreads) + unsigned int threads=maxthreads) : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) { Setup(in,out); - } - + } + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_2d(nx,ny,(double *) in,(fftw_complex *) out, effort); } - + void Execute(Complex *in, Complex *out, bool shift=false) { if(shift) { if(inplace) Shift(in,nx,ny,threads); @@ -1167,7 +1167,7 @@ class rcfft2d : public fftw { } fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } - + // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { unsigned int nyp=ny/2+1; @@ -1185,14 +1185,14 @@ class rcfft2d : public fftw { f[(i+1)*nyp-1]=0.0; } }; - + // Compute the real two-dimensional inverse Fourier transform of the // nx*(ny/2+1) Complex values corresponding to the spectral values in the // half-plane ky >= 0, using phase sign +1. // Before calling fft(), the array in must be allocated as // Complex[nx*(ny/2+1)] and the array out must be allocated as // double[nx*ny]. The arrays in and out may coincide, -// allocated as Complex[nx*(ny/2+1)]. +// allocated as Complex[nx*(ny/2+1)]. // // Out-of-place usage (input destroyed): // @@ -1205,7 +1205,7 @@ class rcfft2d : public fftw { // crfft2d Backward(nx,ny); // Backward.fft(in); // Origin of Fourier domain at (0,0) // Backward.fft0(in); // Origin of Fourier domain at (nx/2,0) -// +// // Notes: // in contains the upper-half portion (ky >= 0) of the Complex transform; // out contains the nx*ny real values stored as a Complex array. @@ -1213,22 +1213,22 @@ class rcfft2d : public fftw { class crfft2d : public fftw { unsigned int nx; unsigned int ny; -public: +public: crfft2d(unsigned int nx, unsigned int ny, double *out=NULL, unsigned int threads=maxthreads) : - fftw(2*nx*(ny/2+1),1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} - + fftw(2*nx*(ny/2+1),1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} + crfft2d(unsigned int nx, unsigned int ny, Complex *in, double *out=NULL, unsigned int threads=maxthreads) : fftw(nx*realsize(ny,in,out),1,threads,nx*ny), nx(nx), ny(ny) { Setup(in,out); - } - + } + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_2d(nx,ny,(fftw_complex *) in,(double *) out, effort); } - + void Execute(Complex *in, Complex *out, bool shift=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); if(shift) { @@ -1236,7 +1236,7 @@ class crfft2d : public fftw { else Shift((double *) out,nx,ny,threads); } } - + // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { unsigned int nyp=ny/2+1; @@ -1255,11 +1255,11 @@ class crfft2d : public fftw { } }; -// Compute the complex three-dimensional Fourier transform of +// Compute the complex three-dimensional Fourier transform of // nx times ny times nz complex values. Before calling fft(), the arrays in // and out (which may coincide) must be allocated as Complex[nx*ny*nz]. // -// Out-of-place usage: +// Out-of-place usage: // // fft3d Forward(nx,ny,nz,-1,in,out); // Forward.fft(in,out); @@ -1286,20 +1286,20 @@ class fft3d : public fftw { unsigned int nx; unsigned int ny; unsigned int nz; -public: +public: fft3d(unsigned int nx, unsigned int ny, unsigned int nz, int sign, Complex *in=NULL, Complex *out=NULL, - unsigned int threads=maxthreads) - : fftw(2*nx*ny*nz,sign,threads), nx(nx), ny(ny), nz(nz) {Setup(in,out);} - + unsigned int threads=maxthreads) + : fftw(2*nx*ny*nz,sign,threads), nx(nx), ny(ny), nz(nz) {Setup(in,out);} + #ifdef __Array_h__ fft3d(int sign, const Array::array3& in, const Array::array3& out=Array::NULL3, - unsigned int threads=maxthreads) - : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()), nz(in.Nz()) + unsigned int threads=maxthreads) + : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()), nz(in.Nz()) {Setup(in,out);} -#endif - +#endif + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) in, (fftw_complex *) out, sign, effort); @@ -1310,9 +1310,9 @@ class fft3d : public fftw { // nx times ny times nz real values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[nx*ny*nz] // and the array out must be allocated as Complex[nx*ny*(nz/2+1)]. The -// arrays in and out may coincide, allocated as Complex[nx*ny*(nz/2+1)]. +// arrays in and out may coincide, allocated as Complex[nx*ny*(nz/2+1)]. // -// Out-of-place usage: +// Out-of-place usage: // // rcfft3d Forward(nx,ny,nz,in,out); // Forward.fft(in,out); // Origin of Fourier domain at (0,0) @@ -1323,7 +1323,7 @@ class fft3d : public fftw { // rcfft3d Forward(nx,ny,nz); // Forward.fft(in); // Origin of Fourier domain at (0,0) // Forward.fft0(in); // Origin of Fourier domain at (nx/2,ny/2,0) -// +// // Notes: // in contains the nx*ny*nz real values stored as a Complex array; // out contains the upper-half portion (kz >= 0) of the Complex transform. @@ -1332,23 +1332,23 @@ class rcfft3d : public fftw { unsigned int nx; unsigned int ny; unsigned int nz; -public: +public: rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *out=NULL, unsigned int threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) { Setup(out); - } - + } + rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *in, - Complex *out=NULL, unsigned int threads=maxthreads) + Complex *out=NULL, unsigned int threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz), - nx(nx), ny(ny), nz(nz) {Setup(in,out);} - + nx(nx), ny(ny), nz(nz) {Setup(in,out);} + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_3d(nx,ny,nz,(double *) in,(fftw_complex *) out, effort); } - + void Execute(Complex *in, Complex *out, bool shift=false) { if(shift) { if(inplace) Shift(in,nx,ny,nz,threads); @@ -1356,7 +1356,7 @@ class rcfft3d : public fftw { } fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } - + // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { unsigned int nzp=nz/2+1; @@ -1368,7 +1368,7 @@ class rcfft3d : public fftw { for(unsigned int k=0; k < yz; ++k) f[k]=0.0; } - + if(ny % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) @@ -1379,7 +1379,7 @@ class rcfft3d : public fftw { f[iyz+k]=0.0; } } - + if(nz % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) @@ -1389,14 +1389,14 @@ class rcfft3d : public fftw { f[i*yz+(j+1)*nzp-1]=0.0; } }; - + // Compute the real two-dimensional inverse Fourier transform of the // nx*ny*(nz/2+1) Complex values corresponding to the spectral values in the // half-plane kz >= 0, using phase sign +1. // Before calling fft(), the array in must be allocated as // Complex[nx*ny*(nz+1)/2] and the array out must be allocated as // double[nx*ny*nz]. The arrays in and out may coincide, -// allocated as Complex[nx*ny*(nz/2+1)]. +// allocated as Complex[nx*ny*(nz/2+1)]. // // Out-of-place usage (input destroyed): // @@ -1409,7 +1409,7 @@ class rcfft3d : public fftw { // crfft3d Backward(nx,ny,nz); // Backward.fft(in); // Origin of Fourier domain at (0,0) // Backward.fft0(in); // Origin of Fourier domain at (nx/2,ny/2,0) -// +// // Notes: // in contains the upper-half portion (kz >= 0) of the Complex transform; // out contains the nx*ny*nz real values stored as a Complex array. @@ -1418,22 +1418,22 @@ class crfft3d : public fftw { unsigned int nx; unsigned int ny; unsigned int nz; -public: +public: crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *out=NULL, - unsigned int threads=maxthreads) + unsigned int threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) - {Setup(out);} - + {Setup(out);} + crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *in, - double *out=NULL, unsigned int threads=maxthreads) + double *out=NULL, unsigned int threads=maxthreads) : fftw(nx*ny*(realsize(nz,in,out)),1,threads,nx*ny*nz), nx(nx), ny(ny), - nz(nz) {Setup(in,out);} - + nz(nz) {Setup(in,out);} + fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_3d(nx,ny,nz,(fftw_complex *) in,(double *) out, effort); } - + void Execute(Complex *in, Complex *out, bool shift=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); if(shift) { @@ -1441,7 +1441,7 @@ class crfft3d : public fftw { else Shift((double *) out,nx,ny,nz,threads); } } - + // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { unsigned int nzp=nz/2+1; @@ -1453,7 +1453,7 @@ class crfft3d : public fftw { for(unsigned int k=0; k < yz; ++k) f[k]=0.0; } - + if(ny % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) @@ -1464,7 +1464,7 @@ class crfft3d : public fftw { f[iyz+k]=0.0; } } - + if(nz % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) diff --git a/seconds.h b/seconds.h index 4f157b8f..0b92e902 100644 --- a/seconds.h +++ b/seconds.h @@ -13,48 +13,48 @@ #else #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL #endif - + struct timezone { int tz_minuteswest; /* minutes W of Greenwich */ int tz_dsttime; /* type of dst correction */ }; - + // Definition of a gettimeofday function - + inline int gettimeofday(struct timeval *tv, struct timezone *tz) { // Define a structure to receive the current Windows filetime FILETIME ft; - + // Initialize the present time to 0 and the timezone to UTC unsigned __int64 tmpres = 0; static int tzflag = 0; - + if (NULL != tv) { GetSystemTimeAsFileTime(&ft); - -// The GetSystemTimeAsFileTime returns the number of 100 nanosecond -// intervals since Jan 1, 1601 in a structure. Copy the high bits to + +// The GetSystemTimeAsFileTime returns the number of 100 nanosecond +// intervals since Jan 1, 1601 in a structure. Copy the high bits to // the 64 bit tmpres, shift it left by 32 then or in the low 32 bits. tmpres |= ft.dwHighDateTime; tmpres <<= 32; tmpres |= ft.dwLowDateTime; - + // Convert to microseconds by dividing by 10 tmpres /= 10; - -// The Unix epoch starts on Jan 1 1970. Need to subtract the difference + +// The Unix epoch starts on Jan 1 1970. Need to subtract the difference // in seconds from Jan 1 1601. tmpres -= DELTA_EPOCH_IN_MICROSECS; - -// Finally change microseconds to seconds and place in the seconds value. + +// Finally change microseconds to seconds and place in the seconds value. // The modulus picks up the microseconds. tv->tv_sec = (long)(tmpres / 1000000UL); tv->tv_usec = (long)(tmpres % 1000000UL); } - + if (NULL != tz) { if (!tzflag) @@ -62,12 +62,12 @@ inline int gettimeofday(struct timeval *tv, struct timezone *tz) _tzset(); tzflag++; } - + // Adjust for the timezone west of Greenwich tz->tz_minuteswest = _timezone / 60; tz->tz_dsttime = _daylight; } - + return 0; } diff --git a/statistics.h b/statistics.h index 56394d64..196c60c6 100644 --- a/statistics.h +++ b/statistics.h @@ -9,7 +9,7 @@ class statistics { double varL; double varH; public: - statistics() : N(0), A(0.0), varL(0.0), varH(0.0) {} + statistics() : N(0), A(0.0), varL(0.0), varH(0.0) {} double count() {return N;} double mean() {return A;} void add(double t) { @@ -36,14 +36,14 @@ class statistics { return stdev(varH,2.0); } void output(const char *text, unsigned int m) { - std::cout << text << ":\n" - << m << "\t" - << A << "\t" - << stdevL() << "\t" + std::cout << text << ":\n" + << m << "\t" + << A << "\t" + << stdevL() << "\t" << stdevH() << std::endl; } }; - + } #endif diff --git a/tests/cconv.cc b/tests/cconv.cc index 2a92da5b..9b397bc6 100644 --- a/tests/cconv.cc +++ b/tests/cconv.cc @@ -18,7 +18,7 @@ bool Test=false; unsigned int A=2; // number of inputs unsigned int B=1; // number of outputs -inline void init(Complex **F, unsigned int m, unsigned int A) +inline void init(Complex **F, unsigned int m, unsigned int A) { if(A % 2 == 0) { unsigned int M=A/2; @@ -30,14 +30,14 @@ inline void init(Complex **F, unsigned int m, unsigned int A) Complex *gs=F[s+M]; if(Test) { for(unsigned int k=0; k < m; k++) { - fs[k]=factor*iF*pow(E,k*I); - gs[k]=factor*iG*pow(E,k*I); - } + fs[k]=factor*iF*pow(E,k*I); + gs[k]=factor*iG*pow(E,k*I); + } } else { for(unsigned int k=0; k < m; k++) { - fs[k]=ffactor*Complex(k,k+1); - gs[k]=gfactor*Complex(k,2*k+1); - } + fs[k]=ffactor*Complex(k,k+1); + gs[k]=gfactor*Complex(k,2*k+1); + } } } } else { @@ -60,7 +60,7 @@ void multA(Complex **F, unsigned int m, case 2: multbinary(F,m,indexsize,index,r,threads); break; case 4: multbinary2(F,m,indexsize,index,r,threads); break; default: - cerr << "A=" << A << " is not yet implemented" << endl; + cerr << "A=" << A << " is not yet implemented" << endl; exit(1); } @@ -79,26 +79,26 @@ int main(int argc, char* argv[]) bool Direct=false; bool Implicit=true; bool Explicit=false; - + // Number of iterations. unsigned int N0=1000000000; unsigned int N=0; - + unsigned int m=11; // Problem size - + int stats=0; // Type of statistics used in timing test. #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeiptA:B::N:m:n:S:T:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -151,7 +151,7 @@ int main(int argc, char* argv[]) cout << "n=" << n << endl; cout << "m=" << m << endl; - + if(N == 0) { N=N0/n; N = max(N, 20); @@ -164,21 +164,21 @@ int main(int argc, char* argv[]) if(B < 1) B=1; - + unsigned int np=Explicit ? n : m; unsigned int C=max(A,B); Complex *f=ComplexAlign(C*np); - + Complex **F=new Complex *[C]; for(unsigned int s=0; s < C; ++s) F[s]=f+s*np; - + Complex *h0=NULL; if(Test || Direct) h0=ComplexAlign(m*B); double *T=new double[N]; - + if(Implicit) { ImplicitConvolution C(m,A,B); cout << "threads=" << C.Threads() << endl << endl; @@ -186,22 +186,22 @@ int main(int argc, char* argv[]) multiplier *mult=NULL; switch(B) { case 1: - switch(A) { - case 1: mult=multautoconvolution; break; - case 2: mult=multbinary; break; - case 4: mult=multbinary2; break; - case 6: mult=multbinary3; break; - case 8: mult=multbinary4; break; - case 16: mult=multbinary8; break; - default: - cerr << "A=" << A << ", B=" << B << " is not yet implemented" - << endl; - exit(1); - } - break; + switch(A) { + case 1: mult=multautoconvolution; break; + case 2: mult=multbinary; break; + case 4: mult=multbinary2; break; + case 6: mult=multbinary3; break; + case 8: mult=multbinary4; break; + case 16: mult=multbinary8; break; + default: + cerr << "A=" << A << ", B=" << B << " is not yet implemented" + << endl; + exit(1); + } + break; default: - mult=multA; - break; + mult=multA; + break; } if(!mult) { cerr << "A=" << A << ", B=" << B << " is not yet implemented" @@ -221,24 +221,24 @@ int main(int argc, char* argv[]) if(m < 100) { for(unsigned int b=0; b < B; ++b) { - for(unsigned int i=0; i < m; i++) - cout << F[b][i] << endl; - cout << endl; + for(unsigned int i=0; i < m; i++) + cout << F[b][i] << endl; + cout << endl; } } else { cout << f[0] << endl; } - + if(Test || Direct) { for(unsigned int b=0; b < B; ++b) { - for(unsigned int i=0; i < m; i++) { - h0[i+b*m]=F[b][i]; - } + for(unsigned int i=0; i < m; i++) { + h0[i+b*m]=F[b][i]; + } } } } - + if(Explicit) { ExplicitConvolution C(n,m,F[0]); for(unsigned int i=0; i < N; ++i) { @@ -251,16 +251,16 @@ int main(int argc, char* argv[]) cout << endl; timings("Explicit",m,T,N,stats); - if(m < 100) + if(m < 100) for(unsigned int i=0; i < m; i++) cout << F[0][i] << endl; else cout << F[0][0] << endl; cout << endl; if(Test || Direct) for(unsigned int i=0; i < m; i++) - h0[i]=F[0][i]; + h0[i]=F[0][i]; } - + if(Direct) { DirectConvolution C(m); if(A % 2 == 0 && A > 2) @@ -273,13 +273,13 @@ int main(int argc, char* argv[]) if(A == 1) C.autoconvolve(h,F[0]); T[0]=seconds(); - + cout << endl; timings("Direct",m,T,1); if(m < 100) for(unsigned int i=0; i < m; i++) - cout << h[i] << endl; + cout << h[i] << endl; else cout << h[0] << endl; @@ -287,7 +287,7 @@ int main(int argc, char* argv[]) double error=0.0; double norm=0.0; for(unsigned int b=0; b < B; ++b) { - double factor=1.0+b; + double factor=1.0+b; for(unsigned long long k=0; k < m; k++) { error += abs2(h0[k+b*m]-factor*h[k]); norm += abs2(h[k]); @@ -296,15 +296,15 @@ int main(int argc, char* argv[]) if(norm > 0) error=sqrt(error/norm); cout << "error=" << error << endl; if (error > 1e-12) - cerr << "Caution! error=" << error << endl; + cerr << "Caution! error=" << error << endl; } - + if(Test) for(unsigned int i=0; i < m; i++) - h0[i]=h[i]; + h0[i]=h[i]; deleteAlign(h); } - + if(Test) { Complex *h=ComplexAlign(n*B); // test accuracy of convolution methods: @@ -329,11 +329,11 @@ int main(int argc, char* argv[]) for(unsigned long long k=0; k < m; k++) h[k]=k*(0.5*k*(k+1)) - k*(k+1)*(2*k+1)/6.0; } - + if(!testok) { cout << "ERROR: no test case for A="< 1e-12) cerr << "Caution! error=" << error << endl; } } - + delete [] T; for(unsigned int a=0; a < A; ++a) deleteAlign(F[a]); delete [] F; - + return 0; } diff --git a/tests/cconv3.cc b/tests/cconv3.cc index 0dd5ec94..8234da1a 100644 --- a/tests/cconv3.cc +++ b/tests/cconv3.cc @@ -22,9 +22,9 @@ unsigned int mz=4; bool Direct=false, Implicit=true, Explicit=false, Pruned=false; -inline void init(Complex **F, - unsigned int nxp, unsigned int nyp, unsigned int nzp, - unsigned int A) +inline void init(Complex **F, + unsigned int nxp, unsigned int nyp, unsigned int nzp, + unsigned int A) { if(A %2 == 0) { unsigned int M=A/2; @@ -62,20 +62,20 @@ int main(int argc, char* argv[]) unsigned int A=2; // Number of independent inputs unsigned int B=1; // Number of independent outputs - + unsigned int stats=0; // Type of statistics used in timing test. #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeiptA:B:N:m:x:y:z:n:T:S:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -134,27 +134,27 @@ int main(int argc, char* argv[]) } } - + if(my == 0) my=mx; nx=cpadding(mx); ny=cpadding(my); nz=cpadding(mz); - + cout << "nx=" << nx << ", ny=" << ny << ", nz=" << ny << endl; cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl; - + if(N == 0) { N=N0/nx/ny/nz; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); - + array3 h0; if(Direct) h0.Allocate(mx,my,mz,align); - + int nxp=Explicit ? nx : mx; int nyp=Explicit ? ny : my; int nzp=Explicit ? nz : mz; @@ -167,7 +167,7 @@ int main(int argc, char* argv[]) cerr << "B=" << B << " is not yet implemented for A=" << A << endl; exit(1); } - + // Allocate input/ouput memory and set up pointers Complex **F=new Complex *[A]; for(unsigned int a=0; a < A; ++a) @@ -177,17 +177,17 @@ int main(int argc, char* argv[]) array3 f(mx,my,mz,F[0]); double *T=new double[N]; - + if(Implicit) { multiplier *mult; - + switch(A) { case 2: mult=multbinary; break; case 4: mult=multbinary2; break; case 6: mult=multbinary3; break; case 8: mult=multbinary4; break; case 16: mult=multbinary8; break; - default: cout << "mult for A=" << A + default: cout << "mult for A=" << A << " is not yet implemented" << endl; exit(1); } @@ -200,23 +200,23 @@ int main(int argc, char* argv[]) // C.convolve(F[0],F[1]); T[i]=seconds(); } - + cout << endl; timings("Implicit",mx,T,N,stats); - + if(Direct) - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) for(unsigned int k=0; k < mz; k++) h0[i][j][k]=f[i][j][k]; cout << endl; - if(mx*my*mz < outlimit) + if(mx*my*mz < outlimit) cout << f; - else + else cout << f[0][0][0] << endl; } - + if(Explicit) { if(A != 2) { cerr << "Explicit convolutions for A=" << A @@ -232,12 +232,12 @@ int main(int argc, char* argv[]) C.convolve(F[0],F[1]); T[i]=seconds(); } - + cout << endl; timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats); if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) for(unsigned int k=0; k < mz; k++) h0[i][j][k]=F[0][nyp*nzp*i+nzp*j+k]; @@ -252,7 +252,7 @@ int main(int argc, char* argv[]) } cout << endl; } - } else { + } else { cout << F[0][0] << endl; } } @@ -264,7 +264,7 @@ int main(int argc, char* argv[]) seconds(); C.convolve(h,F[0],F[1]); T[0]=seconds(); - + timings("Direct",mx,T,1); if(mx*my*mz < outlimit) { @@ -291,12 +291,12 @@ int main(int argc, char* argv[]) } if(norm > 0) error=sqrt(error/norm); cout << "error=" << error << endl; - if (error > 1e-12) + if (error > 1e-12) cerr << "Caution! error=" << error << endl; } } - + delete [] T; for(unsigned int a=0; a < A; ++a) deleteAlign(F[a]); diff --git a/tests/conv.cc b/tests/conv.cc index 2ab02e25..f21671fa 100644 --- a/tests/conv.cc +++ b/tests/conv.cc @@ -24,7 +24,7 @@ void multA(double **F, unsigned int m, case 2: multbinary(F,m,indexsize,index,r,threads); break; case 4: multbinary2(F,m,indexsize,index,r,threads); break; default: - cerr << "A=" << A << " is not yet implemented" << endl; + cerr << "A=" << A << " is not yet implemented" << endl; exit(1); } @@ -36,19 +36,19 @@ void multA(double **F, unsigned int m, } } -inline void init(Complex **F, unsigned int m, unsigned int A) +inline void init(Complex **F, unsigned int m, unsigned int A) { - if(!compact) + if(!compact) for(unsigned int i=0; i < A; ++i) F[i][m]=0.0; - + const Complex I(0.0,1.0); const double E=exp(1.0); const double iF=sqrt(3.0); const double iG=sqrt(5.0); - + if(A % 2 != 0 && A != 1) { - cerr << "A=" << A << " is not yet implemented" << endl; + cerr << "A=" << A << " is not yet implemented" << endl; exit(1); } if(A == 1) { @@ -82,7 +82,7 @@ inline void init(Complex **F, unsigned int m, unsigned int A) void test(unsigned int m, Complex *h0) { - + Complex *h=ComplexAlign(m); double error=0.0; cout << endl; @@ -93,7 +93,7 @@ void test(unsigned int m, Complex *h0) const double E=exp(1.0); const double F=sqrt(3.0); const double G=sqrt(5.0); - + for(long long k=0; k < mm; k++) { h[k]=F*G*(2*mm-1-k)*pow(E,k*I); // h[k]=F*G*(4*m*m*m-6*(k+1)*m*m+(6*k+2)*m+3*k*k*k-3*k)/6.0; @@ -115,21 +115,21 @@ int main(int argc, char* argv[]) unsigned int N=0; // Number of iterations. unsigned int N0=10000000; // Nominal number of iterations unsigned int m=11; // Problem size - + int stats=0; // Type of statistics used in timing test. #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeiptA:B:N:m:n:T:S:X:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -185,26 +185,26 @@ int main(int argc, char* argv[]) } unsigned int n=hpadding(m); - + cout << "n=" << n << endl; cout << "m=" << m << endl; - + if(N == 0) { N=N0/n; N = max(N, 20); } cout << "N=" << N << endl; - + unsigned int np=Explicit ? n/2+1 : m+!compact; // explicit and direct convolutions are only implemented for binary // convolutions. - if(!Implicit) + if(!Implicit) A=2; - + if(B < 1) B=1; - + unsigned int C=max(A,B); Complex *f=ComplexAlign(C*np); Complex **F=new Complex *[C]; @@ -222,10 +222,10 @@ int main(int argc, char* argv[]) cout << "threads=" << C.Threads() << endl << endl; if (A % 2 != 0) { - cerr << "A=" << A << " is not yet implemented" << endl; + cerr << "A=" << A << " is not yet implemented" << endl; exit(1); } - + realmultiplier *mult=0; if(B == 1) { switch(A) { @@ -235,7 +235,7 @@ int main(int argc, char* argv[]) } } else mult=multA; - + for(unsigned int i=0; i < N; ++i) { init(F,m,A); seconds(); @@ -246,26 +246,26 @@ int main(int argc, char* argv[]) timings("Implicit",m,T,N,stats); - + if(m < 100) { for(unsigned int b=0; b < B; ++b) { - for(unsigned int i=0; i < m; i++) - cout << F[b][i] << endl; - cout << endl; + for(unsigned int i=0; i < m; i++) + cout << F[b][i] << endl; + cout << endl; } } else { cout << f[0] << endl; } - + if(Test || Direct) { for(unsigned int b=0; b 0) - error=sqrt(error/norm); + error=sqrt(error/norm); cout << "error=" << error << endl; if (error > 1e-12) - cerr << "Caution! error=" << error << endl; + cerr << "Caution! error=" << error << endl; } - if(Test) + if(Test) for(unsigned int i=0; i < m; i++) - h0[i]=h[i]; + h0[i]=h[i]; deleteAlign(h); } - if(Test) + if(Test) test(m,h0); - + delete [] T; deleteAlign(f); delete [] F; - + return 0; } diff --git a/tests/conv2.cc b/tests/conv2.cc index 4ebbeac6..65d5ae3b 100644 --- a/tests/conv2.cc +++ b/tests/conv2.cc @@ -83,14 +83,14 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; #endif - -#ifdef __GNUC__ + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeipA:B:N:m:x:y:n:T:S:X:Y:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -155,16 +155,16 @@ int main(int argc, char* argv[]) nx=hpadding(mx); ny=hpadding(my); - + cout << "nx=" << nx << ", ny=" << ny << endl; cout << "mx=" << mx << ", my=" << my << endl; - + if(N == 0) { N=N0/nx/ny; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); array2 h0; @@ -175,13 +175,13 @@ int main(int argc, char* argv[]) cout << "nxp=" << nxp << ", nyp=" << nyp << endl; - + if(B < 1) B=1; if(B > A) { cerr << "B=" << B << " is not yet implemented for A=" << A << endl; exit(1); } - + Complex **F=new Complex *[A]; for(unsigned int a=0; a < A; ++a) F[a]=ComplexAlign(nxp*nyp); @@ -201,8 +201,8 @@ int main(int argc, char* argv[]) case 4: mult=multbinary2; break; default: cerr << "A=" << A << " is not yet implemented" << endl; exit(1); } - - + + for(unsigned int i=0; i < N; ++i) { init(F,mx,my,nxp,nyp,A,xcompact,ycompact); seconds(); @@ -210,28 +210,28 @@ int main(int argc, char* argv[]) // C.convolve(f,g); T[i]=seconds(); } - + timings("Implicit",mx,T,N,stats); if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) h0[i][j]=f[i+!xcompact][j]; } - + if(nxp*my < outlimit) for(unsigned int i=!xcompact; i < nxp; i++) { for(unsigned int j=0; j < my; j++) cout << f[i][j] << "\t"; cout << endl; } else cout << f[!xcompact][0] << endl; - + } - + if(Explicit) { unsigned int M=A/2; ExplicitHConvolution2 C(nx,ny,mx,my,f,M,Pruned); - + for(unsigned int i=0; i < N; ++i) { init(F,mx,my,nxp,nyp,A,true,true); seconds(); @@ -243,14 +243,14 @@ int main(int argc, char* argv[]) timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats); unsigned int offset=nx/2-mx+1; - + if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) h0[i][j]=f[offset+i][j]; } - if(2*(mx-1)*my < outlimit) { + if(2*(mx-1)*my < outlimit) { for(unsigned int i=offset; i < offset+2*mx-1; i++) { for(unsigned int j=0; j < my; j++) cout << f[i][j] << "\t"; @@ -260,7 +260,7 @@ int main(int argc, char* argv[]) cout << f[offset][0] << endl; } } - + if(Direct) { unsigned int nxp=2*mx-1; array2 h(nxp,my,align); @@ -269,7 +269,7 @@ int main(int argc, char* argv[]) seconds(); C.convolve(h,F[0],F[1]); T[0]=seconds(); - + cout << endl; timings("Direct",mx,T,1); @@ -295,12 +295,12 @@ int main(int argc, char* argv[]) if (error > 1e-12) cerr << "Caution! error=" << error << endl; } } - + delete [] T; for(unsigned int a=0; a < A; ++a) deleteAlign(F[a]); delete [] F; - - + + return 0; } diff --git a/tests/conv3.cc b/tests/conv3.cc index a615abf0..b6913b5a 100644 --- a/tests/conv3.cc +++ b/tests/conv3.cc @@ -38,7 +38,7 @@ inline void init(Complex **F, unsigned int M=A/2; unsigned int nx=2*mx-1; unsigned int ny=2*my-1; - + double factor=1.0/sqrt((double) M); for(unsigned int s=0; s < M; ++s) { double S=sqrt(1.0+s); @@ -121,7 +121,7 @@ inline void init(array3& f, array3& g, unsigned int M=1, int main(int argc, char* argv[]) { fftw::maxthreads=get_max_threads(); - + unsigned int A=2; // Number of independent inputs unsigned int B=1; // Number of outputs @@ -129,15 +129,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeipA:B:N:m:x:y:z:n:T:S:X:Y:Z:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -176,7 +176,7 @@ int main(int argc, char* argv[]) break; case 'n': N0=atoi(optarg); - break; + break; case 'T': fftw::maxthreads=max(atoi(optarg),1); break; @@ -204,29 +204,29 @@ int main(int argc, char* argv[]) nx=hpadding(mx); ny=hpadding(my); nz=hpadding(mz); - + cout << "nx=" << nx << ", ny=" << ny << ", nz=" << ny << endl; cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl; - + if(N == 0) { N=N0/nx/ny/nz; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); nxp=2*mx-xcompact; nyp=2*my-ycompact; nzp=mz+!zcompact; cout << "nxp=" << nxp << ", nyp" << nyp << ", nzp=" << nzp << endl; - + if(B < 1) B=1; if(B > A) { cerr << "B=" << B << " is not yet implemented for A=" << A << endl; exit(1); } - + Complex **F=new Complex *[A]; for(unsigned int a=0; a < A; ++a) F[a]=ComplexAlign(nxp*nyp*nzp); @@ -242,7 +242,7 @@ int main(int argc, char* argv[]) if(Implicit) { ImplicitHConvolution3 C(mx,my,mz,xcompact,ycompact,zcompact,A,B); cout << "threads=" << C.Threads() << endl << endl; - + realmultiplier *mult; switch(A) { case 2: mult=multbinary; break; @@ -257,11 +257,11 @@ int main(int argc, char* argv[]) // C.convolve(f,g); T[i]=seconds(); } - + timings("Implicit",mx,T,N,stats); if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) for(unsigned int k=0; k < mz; k++) h0[i][j][k]=f[i+!xcompact][j+!ycompact][k]; @@ -277,13 +277,13 @@ int main(int argc, char* argv[]) cout << endl; } } else cout << f[!xcompact][!ycompact][0] << endl; - + } - + if(Direct) { unsigned int nxp=2*mx-1; unsigned int nyp=2*my-1; - + array3 h(nxp,nyp,mz,align); array3 f(nxp,nyp,mz,align); array3 g(nxp,nyp,mz,align); @@ -305,7 +305,7 @@ int main(int argc, char* argv[]) cout << endl; } else cout << h[0][0][0] << endl; - + if(Implicit) { // compare implicit version with direct verion: double error=0.0; double norm=0.0; @@ -323,12 +323,12 @@ int main(int argc, char* argv[]) } } - + delete [] T; for(unsigned int a=0; a < A; ++a) deleteAlign(F[a]); delete [] F; - + return 0; } diff --git a/tests/direct.cc b/tests/direct.cc index 673e4066..a768238e 100644 --- a/tests/direct.cc +++ b/tests/direct.cc @@ -39,7 +39,7 @@ void DirectHConvolution::convolve(Complex *h, Complex *f, Complex *g) for(unsigned int j=1; j < m-i; ++j) sum += conj(f[j])*g[i+j]; h[i]=sum; } -} +} void DirectConvolution2::convolve(Complex *h, Complex *f, Complex *g) { @@ -55,18 +55,18 @@ void DirectConvolution2::convolve(Complex *h, Complex *f, Complex *g) h[i*my+j]=sum; } } -} +} void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g, bool symmetrize) { unsigned int xorigin=mx-1; - + if(symmetrize) { HermitianSymmetrizeX(mx,my,mx-1,f); HermitianSymmetrizeX(mx,my,mx-1,g); } - + int xstart=-(int)xorigin; int ystart=1-(int) my; int xstop=mx; @@ -83,9 +83,9 @@ void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g, if(qx >= xstart && qx < xstop) { int qy=ky-py; if(qy >= ystart && qy < ystop) { - sum += ((py >= 0) ? f[(xorigin+px)*my+py] : + sum += ((py >= 0) ? f[(xorigin+px)*my+py] : conj(f[(xorigin-px)*my-py])) * - ((qy >= 0) ? g[(xorigin+qx)*my+qy] : + ((qy >= 0) ? g[(xorigin+qx)*my+qy] : conj(g[(xorigin-qx)*my-qy])); } } @@ -93,7 +93,7 @@ void DirectHConvolution2::convolve(Complex *h, Complex *f, Complex *g, h[(xorigin+kx)*my+ky]=sum; } } - } + } } void DirectConvolution3::convolve(Complex *h, Complex *f, Complex *g) @@ -113,20 +113,20 @@ void DirectConvolution3::convolve(Complex *h, Complex *f, Complex *g) } } } -} +} -void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g, +void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g, bool symmetrize) { unsigned int xorigin=mx-1; unsigned int yorigin=my-1; unsigned int ny=2*my-1; - + if(symmetrize) { HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,f); HermitianSymmetrizeXY(mx,my,mz,mx-1,my-1,g); } - + int xstart=-(int) xorigin; int ystart=-(int) yorigin; int zstart=1-(int) mz; @@ -149,10 +149,10 @@ void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g, if(qy >= ystart && qy < ystop) { int qz=kz-pz; if(qz >= zstart && qz < zstop) { - sum += ((pz >= 0) ? - f[((xorigin+px)*ny+yorigin+py)*mz+pz] : + sum += ((pz >= 0) ? + f[((xorigin+px)*ny+yorigin+py)*mz+pz] : conj(f[((xorigin-px)*ny+yorigin-py)*mz-pz])) * - ((qz >= 0) ? g[((xorigin+qx)*ny+yorigin+qy)*mz+qz] : + ((qz >= 0) ? g[((xorigin+qx)*ny+yorigin+qy)*mz+qz] : conj(g[((xorigin-qx)*ny+yorigin-qy)*mz-qz])); } } @@ -163,7 +163,7 @@ void DirectHConvolution3::convolve(Complex *h, Complex *f, Complex *g, h[((xorigin+kx)*ny+yorigin+ky)*mz+kz]=sum; } } - } + } } void DirectHTConvolution::convolve(Complex *h, Complex *e, Complex *f, @@ -198,7 +198,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f, HermitianSymmetrizeX(mx,my,mx-1,f); HermitianSymmetrizeX(mx,my,mx-1,g); } - + unsigned int xorigin=mx-1; int xstart=-(int) xorigin; int xstop=mx; @@ -212,7 +212,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f, Complex sum=0.0; for(int px=xstart; px < xstop; ++px) { for(int py=ystart; py < ystop; ++py) { - Complex E=(py >= 0) ? e[(xorigin+px)*my+py] : + Complex E=(py >= 0) ? e[(xorigin+px)*my+py] : conj(e[(xorigin-px)*my-py]); for(int qx=xstart; qx < xstop; ++qx) { for(int qy=ystart; qy < ystop; ++qy) { @@ -221,9 +221,9 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f, int ry=ky-py-qy; if(ry >= ystart && ry < ystop) { sum += E * - ((qy >= 0) ? f[(xorigin+qx)*my+qy] : + ((qy >= 0) ? f[(xorigin+qx)*my+qy] : conj(f[(xorigin-qx)*my-qy])) * - ((ry >= 0) ? g[(xorigin+rx)*my+ry] : + ((ry >= 0) ? g[(xorigin+rx)*my+ry] : conj(g[(xorigin-rx)*my-ry])); } } @@ -233,7 +233,7 @@ void DirectHTConvolution2::convolve(Complex *h, Complex *e, Complex *f, h[(xorigin+kx)*my+ky]=sum; } } - } + } } } diff --git a/tests/direct.h b/tests/direct.h index a68d52b9..1dd9de8a 100644 --- a/tests/direct.h +++ b/tests/direct.h @@ -7,9 +7,9 @@ namespace fftwpp { class DirectConvolution { protected: unsigned int m; -public: +public: DirectConvolution(unsigned int m) : m(m) {} - + void convolve(Complex *h, Complex *f, Complex *g); void autoconvolve(Complex *h, Complex *f); }; @@ -18,9 +18,9 @@ class DirectConvolution { class DirectHConvolution { protected: unsigned int m; -public: +public: DirectHConvolution(unsigned int m) : m(m) {} - + // Compute h= f (*) g via direct convolution, where f and g contain the m // non-negative Fourier components of real functions (contents // preserved). The output of m complex values is returned in the array h, @@ -30,65 +30,65 @@ class DirectHConvolution { // Out-of-place direct 2D complex convolution. class DirectConvolution2 { -protected: +protected: unsigned int mx,my; public: DirectConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my) {} - + void convolve(Complex *h, Complex *f, Complex *g); }; // Out-of-place direct 2D Hermitian convolution. class DirectHConvolution2 { -protected: +protected: unsigned int mx,my; public: DirectHConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my) {} - + void convolve(Complex *h, Complex *f, Complex *g, bool symmetrize=true); }; // Out-of-place direct 3D complex convolution. class DirectConvolution3 { -protected: +protected: unsigned int mx,my,mz; unsigned int myz; public: - DirectConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : + DirectConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : mx(mx), my(my), mz(mz), myz(my*mz) {} - + void convolve(Complex *h, Complex *f, Complex *g); }; // Out-of-place direct 3D Hermitian convolution. class DirectHConvolution3 { -protected: +protected: unsigned int mx,my,mz; public: - DirectHConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : + DirectHConvolution3(unsigned int mx, unsigned int my, unsigned int mz) : mx(mx), my(my), mz(mz) {} - + void convolve(Complex *h, Complex *f, Complex *g, bool symmetrize=true); }; // Out-of-place direct 1D Hermitian ternary convolution. class DirectHTConvolution { -protected: +protected: unsigned int m; public: DirectHTConvolution(unsigned int m) : m(m) {} - + void convolve(Complex *h, Complex *e, Complex *f, Complex *g); }; // Out-of-place direct 2D Hermitian ternary convolution. class DirectHTConvolution2 { -protected: +protected: unsigned int mx,my; public: DirectHTConvolution2(unsigned int mx, unsigned int my) : mx(mx), my(my) {} - + void convolve(Complex *h, Complex *e, Complex *f, Complex *g, bool symmetrize=true); }; diff --git a/tests/explicit.cc b/tests/explicit.cc index c07b1e14..320f241f 100644 --- a/tests/explicit.cc +++ b/tests/explicit.cc @@ -15,22 +15,22 @@ void ExplicitConvolution::backwards(Complex *f) { Backwards->fft(f); } - + void ExplicitConvolution::forwards(Complex *f) { Forwards->fft(f); } - + void ExplicitConvolution::convolve(Complex *f, Complex *g) { pad(f); backwards(f); - + pad(g); backwards(g); - + double ninv=1.0/n; - + Vec Ninv=LOAD(ninv); PARALLEL( for(unsigned int k=0; k < n; ++k) @@ -46,12 +46,12 @@ void ExplicitHConvolution::pad(Complex *f) for(unsigned int i=m; i <= n2; ++i) f[i]=0.0; ); } - + void ExplicitHConvolution::backwards(Complex *f) { cr->fft(f); } - + void ExplicitHConvolution::forwards(Complex *f) { rc->fft(f); @@ -61,19 +61,19 @@ void ExplicitHConvolution::convolve(Complex *f, Complex *g) { pad(f); backwards(f); - + pad(g); backwards(g); - + double *F=(double *) f; double *G=(double *) g; - + double ninv=1.0/n; PARALLEL( for(unsigned int k=0; k < n; ++k) F[k] *= G[k]*ninv; ); - + forwards(f); } @@ -88,7 +88,7 @@ void ExplicitConvolution2::pad(Complex *f) f[j]=0.0; } ); - + // zero pad right-hand block const unsigned int start=mx*ny; const unsigned int stop=nx*ny; @@ -106,7 +106,7 @@ void ExplicitConvolution2::backwards(Complex *f) } else Backwards->fft(f); } - + void ExplicitConvolution2::forwards(Complex *f) { if(prune) { @@ -115,15 +115,15 @@ void ExplicitConvolution2::forwards(Complex *f) } else Forwards->fft(f); } - + void ExplicitConvolution2::convolve(Complex *f, Complex *g) { pad(f); backwards(f); - + pad(g); backwards(g); - + unsigned int n=nx*ny; Vec Ninv=LOAD(1.0/n); PARALLEL( @@ -141,10 +141,10 @@ void ExplicitHConvolution2::pad(Complex *f) // zero pad left block unsigned int stop=(nx2-mx+1)*nyp; PARALLEL( - for(unsigned int i=0; i < stop; ++i) + for(unsigned int i=0; i < stop; ++i) f[i]=0.0; ); - + // zero pad top-middle block unsigned int stop2=stop+2*mx*nyp; unsigned int diff=nyp-my; @@ -154,11 +154,11 @@ void ExplicitHConvolution2::pad(Complex *f) f[j]=0.0; } ); - + // zero pad right block stop=nx*nyp; PARALLEL( - for(unsigned int i=(nx2+mx)*nyp; i < stop; ++i) + for(unsigned int i=(nx2+mx)*nyp; i < stop; ++i) f[i]=0.0; ); } @@ -219,25 +219,25 @@ void ExplicitHConvolution2::convolve(Complex **F, Complex **G, bool symmetrize) { unsigned int xorigin=nx/2; unsigned int nyp=ny/2+1; - + for(unsigned int s=0; s < M; ++s) { Complex *f=F[s]; if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,f); pad(f); backwards(f,false); - + Complex *g=G[s]; if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,g); pad(g); backwards(g,false); } - + double ninv=1.0/(nx*ny); unsigned int nyp2=2*nyp; double *f=(double *) F[0]; double *g=(double *) G[0]; - + if(M == 1) { PARALLEL( for(unsigned int i=0; i < nx; ++i) { @@ -272,7 +272,7 @@ void ExplicitHConvolution2::convolve(Complex **F, Complex **G, bool symmetrize) } ); } - + forwards(F[0]); } @@ -289,7 +289,7 @@ void ExplicitConvolution3::pad(Complex *f) } } ); - + unsigned int nyz=ny*nz; PARALLEL( for(unsigned int i=mx; i < nx; ++i) { @@ -302,7 +302,7 @@ void ExplicitConvolution3::pad(Complex *f) } } ); - + PARALLEL( for(unsigned int i=0; i < nx; ++i) { unsigned int nyzi=nyz*i; @@ -328,7 +328,7 @@ void ExplicitConvolution3::backwards(Complex *f) } else Backwards->fft(f); } - + void ExplicitConvolution3::forwards(Complex *f) { if(prune) { @@ -346,10 +346,10 @@ void ExplicitConvolution3::convolve(Complex *f, Complex *g) { pad(f); backwards(f); - + pad(g); backwards(g); - + unsigned int n=nx*ny*nz; Vec Ninv=LOAD(1.0/n); PARALLEL( @@ -382,23 +382,23 @@ void ExplicitHTConvolution::convolve(Complex *f, Complex *g, Complex *h) { pad(f); backwards(f); - + pad(g); backwards(g); - + pad(h); backwards(h); - + double *F=(double *) f; double *G=(double *) g; double *H=(double *) h; - + double ninv=1.0/n; PARALLEL( for(unsigned int k=0; k < n; ++k) F[k] *= G[k]*H[k]*ninv; ); - + forwards(f); } @@ -415,7 +415,7 @@ void ExplicitHTConvolution2::pad(Complex *f) f[j]=0.0; } ); - + PARALLEL( for(unsigned int i=nx2+mx; i < nx; ++i) { unsigned int nypi=nyp*i; @@ -424,7 +424,7 @@ void ExplicitHTConvolution2::pad(Complex *f) f[j]=0.0; } ); - + PARALLEL( for(unsigned int i=0; i < nx; ++i) { unsigned int nypi=nyp*i; @@ -464,23 +464,23 @@ void ExplicitHTConvolution2::convolve(Complex *f, Complex *g, Complex *h, { unsigned int xorigin=nx/2; unsigned int nyp=ny/2+1; - + if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,f); pad(f); backwards(f,false); - + if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,g); pad(g); backwards(g,false); - + if(symmetrize) HermitianSymmetrizeX(mx,nyp,xorigin,h); pad(h); backwards(h,false); - + double *F=(double *) f; double *G=(double *) g; double *H=(double *) h; - + double ninv=1.0/(nx*ny); unsigned int nyp2=2*nyp; @@ -492,7 +492,7 @@ void ExplicitHTConvolution2::convolve(Complex *f, Complex *g, Complex *h, F[j] *= G[j]*H[j]*ninv; } ); - + forwards(f,false); } diff --git a/tests/explicit.h b/tests/explicit.h index 7fc67c8d..f83b6fbd 100644 --- a/tests/explicit.h +++ b/tests/explicit.h @@ -8,8 +8,8 @@ class ExplicitConvolution : public ThreadBase { protected: unsigned int n,m; fft1d *Backwards,*Forwards; -public: - +public: + // u is a temporary array of size n. ExplicitConvolution(unsigned int n, unsigned int m, Complex *u) : n(n), m(m) { @@ -18,17 +18,17 @@ class ExplicitConvolution : public ThreadBase { threads=Forwards->Threads(); } - + ~ExplicitConvolution() { delete Forwards; delete Backwards; - } - + } + void pad(Complex *f); void backwards(Complex *f); void forwards(Complex *f); - - // Compute f (*) g. The distinct input arrays f and g are each of size n + + // Compute f (*) g. The distinct input arrays f and g are each of size n // (contents not preserved). The output is returned in f. void convolve(Complex *f, Complex *g); }; @@ -49,16 +49,16 @@ class ExplicitHConvolution : public ThreadBase { threads=cr->Threads(); } - + ~ExplicitHConvolution() { delete cr; delete rc; } - + void pad(Complex *f); void backwards(Complex *f); void forwards(Complex *f); - + // Compute f (*) g, where f and g contain the m non-negative Fourier // components of real functions. Dealiasing is internally implemented via // explicit zero-padding to size n >= 3*m. @@ -95,7 +95,7 @@ class ExplicitConvolution2 : public ThreadBase { threads=Forwards->Threads(); } } - + ~ExplicitConvolution2() { if(prune) { delete xForwards; @@ -106,8 +106,8 @@ class ExplicitConvolution2 : public ThreadBase { delete Forwards; delete Backwards; } - } - + } + void pad(Complex *f); void backwards(Complex *f); void forwards(Complex *f); @@ -127,11 +127,11 @@ class ExplicitHConvolution2 : public ThreadBase { mrcfft1d *yForwards; crfft2d *Backwards; rcfft2d *Forwards; - + unsigned int s; Complex *ZetaH,*ZetaL; -public: - ExplicitHConvolution2(unsigned int nx, unsigned int ny, +public: + ExplicitHConvolution2(unsigned int nx, unsigned int ny, unsigned int mx, unsigned int my, Complex *f, unsigned int M=1, bool pruned=false) : @@ -156,13 +156,13 @@ class ExplicitHConvolution2 : public ThreadBase { yForwards=new mrcfft1d(ny,nx,1,1,rdist,cdist,(double*) f); yBackwards=new mcrfft1d(ny,nx,1,1,cdist,rdist,f); } - + } else { Backwards=new crfft2d(nx,ny,f); Forwards=new rcfft2d(nx,ny,f); } } - + ~ExplicitHConvolution2() { if(prune) { delete xForwards; @@ -173,13 +173,13 @@ class ExplicitHConvolution2 : public ThreadBase { delete Forwards; delete Backwards; } - } - + } + void pad(Complex *f); void backwards(Complex *f, bool shift=true); void forwards(Complex *f); void convolve(Complex **F, Complex **G, bool symmetrize=true); - + // Constructor for special case M=1: void convolve(Complex *f, Complex *g, bool symmetrize=true) { convolve(&f,&g,symmetrize); @@ -217,7 +217,7 @@ class ExplicitConvolution3 : public ThreadBase { Forwards=new fft3d(nx,ny,nz,-1,f); } } - + ~ExplicitConvolution3() { if(prune) { delete xForwards; @@ -230,8 +230,8 @@ class ExplicitConvolution3 : public ThreadBase { delete Forwards; delete Backwards; } - } - + } + void pad(Complex *f); void backwards(Complex *f); void forwards(Complex *f); @@ -255,16 +255,16 @@ class ExplicitHTConvolution : public ThreadBase { threads=cr->Threads(); } - + ~ExplicitHTConvolution() { delete cr; delete rc; } - + void pad(Complex *f); void backwards(Complex *f); void forwards(Complex *f); - + // Compute the ternary convolution of f, and g, and h, where f, and g, and h // contain the m non-negative Fourier components of real // functions. Dealiasing is internally implemented via explicit @@ -290,8 +290,8 @@ class ExplicitHTConvolution2 : public ThreadBase { Complex *ZetaH,*ZetaL; unsigned int threads; -public: - ExplicitHTConvolution2(unsigned int nx, unsigned int ny, +public: + ExplicitHTConvolution2(unsigned int nx, unsigned int ny, unsigned int mx, unsigned int my, Complex *f, bool pruned=false) : nx(nx), ny(ny), mx(mx), my(my), prune(pruned) { @@ -303,7 +303,7 @@ class ExplicitHTConvolution2 : public ThreadBase { prune=true; s=BuildZeta(2*nx,nx,ZetaH,ZetaL); } - + if(prune) { xBackwards=new mfft1d(nx,1,My,nyp,1,f); xForwards=new mfft1d(nx,-1,My,nyp,1,f); @@ -321,7 +321,7 @@ class ExplicitHTConvolution2 : public ThreadBase { threads=Forwards->Threads(); } } - + ~ExplicitHTConvolution2() { if(prune) { delete xForwards; @@ -332,8 +332,8 @@ class ExplicitHTConvolution2 : public ThreadBase { delete Forwards; delete Backwards; } - } - + } + void pad(Complex *f); void backwards(Complex *f, bool shift=true); void forwards(Complex *f, bool shift=true); diff --git a/tests/fft1.cc b/tests/fft1.cc index f60b8c25..ac04d85f 100644 --- a/tests/fft1.cc +++ b/tests/fft1.cc @@ -20,10 +20,10 @@ int main(int argc, char* argv[]) fftw::maxthreads=get_max_threads(); int r=-1; - -#ifdef __GNUC__ + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"N:m:x:r:T:S:h"); if (c == -1) break; @@ -57,17 +57,17 @@ int main(int argc, char* argv[]) size_t align=sizeof(Complex); - + array1 f(m,align); - + array1 g(m,align); - + fft1d Forward(-1,f); fft1d Backward(1,f); fft1d Forward0(-1,f,g); fft1d Backward0(1,g,f); - + for(unsigned int i=0; i < m; i++) f[i]=i; double *T=new double[N]; diff --git a/tests/fft1r.cc b/tests/fft1r.cc index 878092bf..3e6d73b7 100644 --- a/tests/fft1r.cc +++ b/tests/fft1r.cc @@ -25,10 +25,10 @@ int main(int argc, char* argv[]) fftw::maxthreads=get_max_threads(); int r=-1; - -#ifdef __GNUC__ + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c=getopt(argc,argv,"N:m:x:r:T:S:h"); if (c == -1) break; @@ -63,10 +63,10 @@ int main(int argc, char* argv[]) size_t align=sizeof(Complex); unsigned int mp=m/2+1; - + array1 f(m+2,align); array1 g(mp,align); - + double *T=new double[N]; if(r == -1 || r == 0) { diff --git a/tests/fft2.cc b/tests/fft2.cc index bbac6ca9..edac04e4 100644 --- a/tests/fft2.cc +++ b/tests/fft2.cc @@ -16,13 +16,13 @@ unsigned int my=4; bool Direct=false, Implicit=true, Explicit=false, Pruned=false; -inline void init(array2& f) +inline void init(array2& f) { for(unsigned int i=0; i < mx; ++i) for(unsigned int j=0; j < my; j++) f[i][j]=Complex(i,j); } - + unsigned int outlimit=100; int main(int argc, char* argv[]) @@ -34,15 +34,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hN:m:x:y:n:T:S:r:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -80,13 +80,13 @@ int main(int argc, char* argv[]) if(my == 0) my=mx; cout << "mx=" << mx << ", my=" << my << endl; - + if(N == 0) { N=N0/mx/my; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); array2 f(mx,my,align); @@ -97,7 +97,7 @@ int main(int argc, char* argv[]) if(r == -1 || r == 0) { // conventional FFT, in-place fft2d Forward2(-1,f); fft2d Backward2(1,f); - + for(unsigned int i=0; i < N; ++i) { init(f); seconds(); @@ -108,12 +108,12 @@ int main(int argc, char* argv[]) } timings("fft2d, in-place",mx,T,N,stats); } - + if(r == -1 || r == 1) { // conventional FFT, out-of-place fft2d Forward2(-1,f,g); fft2d Backward2(1,f,g); - + for(unsigned int i=0; i < N; ++i) { init(f); seconds(); @@ -129,7 +129,7 @@ int main(int argc, char* argv[]) Transpose Txy(mx,my,1,f(),f(),fftw::maxthreads); Transpose Tyx(my,mx,1,f(),f(),fftw::maxthreads); - + mfft1d Forwardx(mx,-1,my,1,my); mfft1d Backwardx(mx,1,my,1,my); @@ -158,7 +158,7 @@ int main(int argc, char* argv[]) Transpose Txy(mx,my,1,f(),g(),fftw::maxthreads); Transpose Tyx(my,mx,1,f(),g(),fftw::maxthreads); - + mfft1d Forwardx(mx,-1,my,1,my,f,g); mfft1d Backwardx(mx,1,my,1,my,f,g); @@ -182,12 +182,12 @@ int main(int argc, char* argv[]) } timings("transpose and mfft, out-of-place",mx,T,N,stats); } - + if(r == -1 || r == 4) { // full transpose, in-place Transpose Txy(mx,my,1,f(),f(),fftw::maxthreads); Transpose Tyx(my,mx,1,f(),f(),fftw::maxthreads); - + mfft1d Forwardx(mx,-1,my,1,my); mfft1d Backwardx(mx,1,my,1,my); @@ -219,7 +219,7 @@ int main(int argc, char* argv[]) Transpose Txy(mx,my,1,f(),g(),fftw::maxthreads); Transpose Tyx(my,mx,1,f(),g(),fftw::maxthreads); - + mfft1d Forwardx(mx,-1,my,1,my,f,g); mfft1d Backwardx(mx,1,my,1,my,f,g); @@ -233,9 +233,9 @@ int main(int argc, char* argv[]) Forwardy.fft(f,g); Txy.transpose(g(),f()); Forwardx.fft(f,g); - + Tyx.transpose(g(),f()); - + Txy.transpose(f(),g()); Backwardx.fft(g,f); @@ -247,7 +247,7 @@ int main(int argc, char* argv[]) } timings("2 transposes and mfft, out-of-place",mx,T,N,stats); } - + if(r == -1 || r == 6) { // using strides, in-place mfft1d Forwardx(mx,-1,my,my,1,f,f); @@ -296,7 +296,7 @@ int main(int argc, char* argv[]) } timings("strided mfft out-of-place",mx,T,N,stats); } - + cout << endl; if(mx*my < outlimit) { for(unsigned int i=0; i < mx; i++) { @@ -306,9 +306,9 @@ int main(int argc, char* argv[]) } } else cout << f[0][0] << endl; - + delete [] T; - + return 0; } diff --git a/tests/fft2r.cc b/tests/fft2r.cc index 9428a573..21e3de5d 100644 --- a/tests/fft2r.cc +++ b/tests/fft2r.cc @@ -23,19 +23,19 @@ int main(int argc, char* argv[]) unsigned int nx=4; unsigned int ny=5; - + int N=1000; unsigned int stats=MEAN; // Type of statistics used in timing test. bool inplace=false; bool shift=false; bool quiet=false; - + fftw::maxthreads=get_max_threads(); - -#ifdef __GNUC__ + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c=getopt(argc,argv,"N:i:m:s:x:y:T:S:hq"); if (c == -1) break; @@ -79,15 +79,15 @@ int main(int argc, char* argv[]) size_t align=sizeof(Complex); unsigned int nyp=ny/2+1; - + array2 g(nx,nyp,align); array2 f; - + if(inplace) f.Dimension(nx,2*nyp,(double *) g()); else f.Allocate(nx,ny,align); - + rcfft2d Forward(nx,ny,f,g); crfft2d Backward(nx,ny,g,f); @@ -120,7 +120,7 @@ int main(int argc, char* argv[]) cout << endl; } } - + double *T= new double[N]; for(int i=0; i < N; ++i) { @@ -141,5 +141,5 @@ int main(int argc, char* argv[]) } timings("fft2 out-of-place",nx,T,N,stats); - + } diff --git a/tests/fft3.cc b/tests/fft3.cc index 9daa57ec..e50f098f 100644 --- a/tests/fft3.cc +++ b/tests/fft3.cc @@ -17,14 +17,14 @@ unsigned int mz = 4; bool Direct=false, Implicit=true, Explicit=false, Pruned=false; -inline void init(array3& f) +inline void init(array3& f) { for(unsigned int i = 0; i < mx; ++i) for(unsigned int j = 0; j < my; j++) - for(unsigned int k = 0; k < mz; k++) + for(unsigned int k = 0; k < mz; k++) f(i, j, k)=Complex(10 * k + i, j); } - + unsigned int outlimit = 100; int main(int argc, char* argv[]) @@ -36,15 +36,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hN:m:x:y:z:n:T:S:r:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -86,13 +86,13 @@ int main(int argc, char* argv[]) my = mx; cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl; - + if(N == 0) { N = N0 / mx / my; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align = sizeof(Complex); array3 f(mx, my, mz, align); @@ -103,7 +103,7 @@ int main(int argc, char* argv[]) if(r == -1 || r == 0) { // conventional FFT, in-place fft3d Forward3(-1, f); fft3d Backward3(1, f); - + for(unsigned int i = 0; i < N; ++i) { init(f); seconds(); @@ -114,11 +114,11 @@ int main(int argc, char* argv[]) } timings("fft3d, in-place", mx, T, N, stats); } - + if(r == -1 || r == 1) { // conventional FFT, out-of-place fft3d Forward3(-1, f, g); fft3d Backward3(1, f, g); - + for(unsigned int i = 0; i < N; ++i) { init(f); seconds(); @@ -131,7 +131,7 @@ int main(int argc, char* argv[]) } delete [] T; - + return 0; } diff --git a/tests/fft3r.cc b/tests/fft3r.cc index 8ad65fe3..7e73959a 100644 --- a/tests/fft3r.cc +++ b/tests/fft3r.cc @@ -10,9 +10,9 @@ using namespace fftwpp; void finit(array3 f, unsigned int nx, unsigned int ny, unsigned int nz) { - for(unsigned int i=0; i < nx; ++i) + for(unsigned int i=0; i < nx; ++i) for(unsigned int j=0; j < ny; ++j) - for(unsigned int k=0; k < nz; ++k) + for(unsigned int k=0; k < nz; ++k) f(i,j,k)=i+j+k; } @@ -30,12 +30,12 @@ int main(int argc,char* argv[]) bool inplace=false; bool shift=false; bool quiet=false; - + fftw::maxthreads=get_max_threads(); - -#ifdef __GNUC__ + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c=getopt(argc,argv,"N:i:m:qx:y:z:T:S:h"); if (c == -1) break; @@ -80,12 +80,12 @@ int main(int argc,char* argv[]) } size_t align=sizeof(Complex); - + unsigned int nzp=nz/2+1; - + array3 g(nx,ny,nzp,align); array3 f; - + if(inplace) f.Dimension(nx,ny,2*nzp,(double *) g()); else @@ -93,7 +93,7 @@ int main(int argc,char* argv[]) rcfft3d Forward(nx,ny,nz,f,g); crfft3d Backward(nx,ny,nz,g,f); - + if(!quiet) { finit(f,nx,ny,nz); cout << endl << "Input:" << endl; @@ -127,7 +127,7 @@ int main(int argc,char* argv[]) } double *T=new double[N]; - + for(int i=0; i < N; ++i) { finit(f,nx,ny,nz); if(shift) { @@ -145,5 +145,5 @@ int main(int argc,char* argv[]) } } timings("fft3 out-of-place",nx,T,N,stats); - + } diff --git a/tests/mfft1.cc b/tests/mfft1.cc index dda02a07..1a04c93c 100644 --- a/tests/mfft1.cc +++ b/tests/mfft1.cc @@ -13,13 +13,13 @@ unsigned int N0=10000000; bool Direct=false, Implicit=true, Explicit=false, Pruned=false; -inline void init(array2& f, unsigned int mx, unsigned int my) +inline void init(array2& f, unsigned int mx, unsigned int my) { for(unsigned int i=0; i < mx; ++i) for(unsigned int j=0; j < my; j++) f[i][j]=Complex(i,j); } - + unsigned int outlimit=100; int main(int argc, char* argv[]) @@ -34,15 +34,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hN:m:x:y:n:T:S:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -77,18 +77,18 @@ int main(int argc, char* argv[]) if(my == 0) my=mx; cout << "mx=" << mx << ", my=" << my << endl; - + if(N == 0) { N=N0/mx/my; N = max(N, 20); } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); array2 f(mx,my,align); array2 g(mx,my,align); - + mfft1d Forward(my,-1,mx,1,my); mfft1d Backward(my,1,mx,1,my); @@ -112,7 +112,7 @@ int main(int argc, char* argv[]) cout << f[i][j] << "\t"; cout << endl; } - } else { + } else { cout << f[0][0] << endl; } @@ -124,11 +124,11 @@ int main(int argc, char* argv[]) cout << f[i][j] << "\t"; cout << endl; } - } else { + } else { cout << f[0][0] << endl; } - + cout << endl; double *T=new double[N]; for(unsigned int i=0; i < N; ++i) { @@ -141,7 +141,7 @@ int main(int argc, char* argv[]) } timings("mfft1, in-place",mx,T,N,stats); delete [] T; - + return 0; } diff --git a/tests/mfft1r.cc b/tests/mfft1r.cc index 3bd30dda..61485a34 100644 --- a/tests/mfft1r.cc +++ b/tests/mfft1r.cc @@ -9,13 +9,13 @@ using namespace Array; using namespace fftwpp; -inline void init(array2& f, unsigned int mx, unsigned int my) +inline void init(array2& f, unsigned int mx, unsigned int my) { for(unsigned int i=0; i < mx; ++i) for(unsigned int j=0; j < my; j++) f[i][j] = 10 * i + j; } - + unsigned int outlimit=100; int main(int argc, char* argv[]) @@ -35,11 +35,11 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hN:m:x:y:n:T:S:"); if (c == -1) break; @@ -87,7 +87,7 @@ int main(int argc, char* argv[]) if(N < 10) N=10; } cout << "N=" << N << endl; - + size_t align=sizeof(Complex); array2 f(mx,my,align); @@ -97,7 +97,7 @@ int main(int argc, char* argv[]) size_t cstride=1; size_t rdist=mx; size_t cdist=np; - + mrcfft1d Forward(mx, // length of transform M, // number of transforms rstride, @@ -150,7 +150,7 @@ int main(int argc, char* argv[]) } timings("mfft1r, in-place",mx,T,N,stats); delete [] T; - } + } return 0; } diff --git a/tests/tconv.cc b/tests/tconv.cc index d93e003a..d019cc13 100644 --- a/tests/tconv.cc +++ b/tests/tconv.cc @@ -14,10 +14,10 @@ unsigned int N=0; unsigned int m=12; unsigned int M=1; unsigned int B=1; - + bool Direct=false, Implicit=true, Explicit=false; -inline void init(Complex *e, Complex *f, Complex *g, unsigned int M=1) +inline void init(Complex *e, Complex *f, Complex *g, unsigned int M=1) { unsigned int m1=m+1; unsigned int Mm=M*m1; @@ -47,15 +47,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeipA:B:N:m:n:T:S:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) break; case 'T': fftw::maxthreads=max(atoi(optarg),1); - break; + break; case 'S': stats=atoi(optarg); break; @@ -105,25 +105,25 @@ int main(int argc, char* argv[]) cout << "n=" << n << endl; cout << "m=" << m << endl; - + if(N == 0) { N=N0/n; N = max(N, 20); } cout << "N=" << N << endl; - + Complex *h0=NULL; if(Direct && ! Explicit) h0=ComplexAlign(m); unsigned int m1=m+1; unsigned int np=Explicit ? n/2+1 : m1; if(Implicit) np *= M; - + if(B != 1) { cerr << "B=" << B << " is not yet implemented" << endl; exit(1); } - + Complex *e=ComplexAlign(np); Complex *f=ComplexAlign(np); Complex *g=ComplexAlign(np); @@ -149,20 +149,20 @@ int main(int argc, char* argv[]) // C.convolve(e,f,g); T[i]=seconds(); } - + timings("Implicit",m,T,N,stats); if(Direct) for(unsigned int i=0; i < m; i++) h0[i]=e[i]; - if(m < 100) + if(m < 100) for(unsigned int i=0; i < m; i++) cout << e[i] << endl; else cout << e[0] << endl; - + delete [] G; delete [] F; delete [] E; } - + if(Explicit) { ExplicitHTConvolution C(n,m,f); for(unsigned int i=0; i < N; ++i) { @@ -171,14 +171,14 @@ int main(int argc, char* argv[]) C.convolve(e,f,g); T[i]=seconds(); } - + timings("Explicit",m,T,N,stats); - if(m < 100) + if(m < 100) for(unsigned int i=0; i < m; i++) cout << e[i] << endl; else cout << e[0] << endl; } - + if(Direct) { DirectHTConvolution C(m); init(e,f,g); @@ -186,10 +186,10 @@ int main(int argc, char* argv[]) seconds(); C.convolve(h,e,f,g); T[0]=seconds(); - + timings("Direct",m,T,1); - if(m < 100) + if(m < 100) for(unsigned int i=0; i < m; i++) cout << h[i] << endl; else cout << h[0] << endl; @@ -212,7 +212,7 @@ int main(int argc, char* argv[]) deleteAlign(g); deleteAlign(f); deleteAlign(e); - + delete [] T; return 0; diff --git a/tests/tconv2.cc b/tests/tconv2.cc index eb7b24dc..7acfac20 100644 --- a/tests/tconv2.cc +++ b/tests/tconv2.cc @@ -27,7 +27,7 @@ bool Direct=false, Implicit=true, Explicit=false, Pruned=false; unsigned int outlimit=100; inline void init(Array2& e, Array2& f, Array2& g, - unsigned int M=1) + unsigned int M=1) { unsigned int offset=Explicit ? nx/2-mx+1 : 0; unsigned int stop=2*mx-1; @@ -58,15 +58,15 @@ int main(int argc, char* argv[]) #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hdeiptA:B:N:m:x:y:n:T:S:"); if (c == -1) break; - + switch (c) { case 0: break; @@ -124,21 +124,21 @@ int main(int argc, char* argv[]) nx=tpadding(mx); ny=tpadding(my); - + cout << "nx=" << nx << ", ny=" << ny << endl; cout << "mx=" << mx << ", my=" << my << endl; - + if(N == 0) { N=N0/nx/ny; N = max(N, 20); } cout << "N=" << N << endl; - + if(B != 1) { cerr << "B=" << B << " is not yet implemented" << endl; exit(1); } - + size_t align=sizeof(Complex); array2 h0; if(Direct) h0.Allocate(mx,my,align); @@ -172,11 +172,11 @@ int main(int argc, char* argv[]) // C.convolve(e,f,g); T[i]=seconds(); } - + timings("Implicit",mx,T,N,stats); - + if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) h0[i][j]=e[i][j]; } @@ -188,12 +188,12 @@ int main(int argc, char* argv[]) cout << endl; } else cout << e[1][0] << endl; cout << endl; - + delete [] G; delete [] F; delete [] E; } - + if(Explicit) { ExplicitHTConvolution2 C(nx,ny,mx,my,f,Pruned); for(unsigned int i=0; i < N; ++i) { @@ -202,24 +202,24 @@ int main(int argc, char* argv[]) C.convolve(e,f,g); T[i]=seconds(); } - + timings(Pruned ? "Pruned" : "Explicit",mx,T,N,stats); if(Direct) { - for(unsigned int i=0; i < mx; i++) + for(unsigned int i=0; i < mx; i++) for(unsigned int j=0; j < my; j++) h0[i][j]=e[i][j]; } unsigned int offset=nx/2-mx+1; - if(2*(mx-1)*my < outlimit) + if(2*(mx-1)*my < outlimit) for(unsigned int i=offset; i < offset+2*mx-1; i++) { for(unsigned int j=0; j < my; j++) cout << e[i][j] << "\t"; cout << endl; } else cout << e[offset][0] << endl; } - + if(Direct) { Explicit=false; unsigned int nxp=2*mx-1; @@ -232,7 +232,7 @@ int main(int argc, char* argv[]) seconds(); C.convolve(h,e,f,g); T[0]=seconds(); - + timings("Direct",mx,T,1); if(nxp*my < outlimit) @@ -258,7 +258,7 @@ int main(int argc, char* argv[]) } } - + delete [] T; return 0; diff --git a/tests/timing.h b/tests/timing.h index 6ba3d278..7843dc5b 100644 --- a/tests/timing.h +++ b/tests/timing.h @@ -15,7 +15,7 @@ seconds(); T[i]=seconds(); } - for(unsigned int i=0; i < N; ++i) + for(unsigned int i=0; i < N; ++i) val += T[i]; return val/N; } @@ -23,10 +23,10 @@ enum timing_algorithm {WRITETOFILE = -1, MEAN, MIN, MAX, MEDIAN, P90, P80, P50}; -inline double mean(double *T, unsigned int N, int algorithm) +inline double mean(double *T, unsigned int N, int algorithm) { switch(algorithm) { - case WRITETOFILE: + case WRITETOFILE: case MEAN: { double sum=0.0; for(unsigned int i=0; i < N; ++i) @@ -61,7 +61,7 @@ inline double mean(double *T, unsigned int N, int algorithm) std::sort(T,T+N); unsigned int start=(int)ceil(N*0.05); unsigned int stop=(int)floor(N*0.95); - + double sum=0.0; for(unsigned int i=start; i < stop; ++i) sum += T[i]; @@ -72,7 +72,7 @@ inline double mean(double *T, unsigned int N, int algorithm) std::sort(T,T+N); unsigned int start=(int)ceil(N*0.1); unsigned int stop=(int)floor(N*0.9); - + double sum=0.0; for(unsigned int i=start; i < stop; ++i) sum += T[i]; @@ -83,7 +83,7 @@ inline double mean(double *T, unsigned int N, int algorithm) std::sort(T,T+N); unsigned int start=(int)ceil(N*0.25); unsigned int stop=(int)floor(N*0.75); - + double sum=0.0; for(unsigned int i=start; i < stop; ++i) sum += T[i]; @@ -91,7 +91,7 @@ inline double mean(double *T, unsigned int N, int algorithm) break; } default: - std::cout << "Error: invalid algorithm choice: " + std::cout << "Error: invalid algorithm choice: " << algorithm << std::endl; exit(1); @@ -99,12 +99,12 @@ inline double mean(double *T, unsigned int N, int algorithm) return 0; } -inline void stdev(double *T, unsigned int N, double mean, +inline void stdev(double *T, unsigned int N, double mean, double &sigmaL, double& sigmaH, - int algorithm) + int algorithm) { switch(algorithm) { - case WRITETOFILE: + case WRITETOFILE: case MEAN: { sigmaL=0.0, sigmaH=0.0; for(unsigned int i=0; i < N; ++i) { @@ -114,7 +114,7 @@ inline void stdev(double *T, unsigned int N, double mean, if(v > 0) sigmaH += v*v; } - double factor=N > 2 ? 2.0/(N-2.0) : 0.0; + double factor=N > 2 ? 2.0/(N-2.0) : 0.0; sigmaL=sqrt(sigmaL*factor); sigmaH=sqrt(sigmaH*factor); } @@ -156,7 +156,7 @@ inline void stdev(double *T, unsigned int N, double mean, } break; default: - std::cout << "Error: invalid algorithm choice: " + std::cout << "Error: invalid algorithm choice: " << algorithm << std::endl; exit(1); @@ -168,15 +168,15 @@ inline void timings(const char* text, unsigned int m, unsigned int count, { // mean -= emptytime(T,N); // if(mean < 0.0) mean=0.0; - std::cout << text << ":\n" - << m << "\t" - << mean << "\t" - << sigmaL << "\t" - << sigmaH + std::cout << text << ":\n" + << m << "\t" + << mean << "\t" + << sigmaL << "\t" + << sigmaH << std::endl; } -inline void timings(const char* text, unsigned int m, double *T, +inline void timings(const char* text, unsigned int m, double *T, unsigned int N, int algorithm=MEAN) { double sigmaL=0.0, sigmaH=0.0; diff --git a/tests/transpose.cc b/tests/transpose.cc index 1bbd2a52..077816d3 100644 --- a/tests/transpose.cc +++ b/tests/transpose.cc @@ -15,7 +15,7 @@ unsigned int mx=4; unsigned int my=4; unsigned int mz=4; -inline void init(array3& f, bool transpose=false) +inline void init(array3& f, bool transpose=false) { for(unsigned int i=0; i < mx; ++i) { for(unsigned int j=0; j < my; ++j) { @@ -29,7 +29,7 @@ inline void init(array3& f, bool transpose=false) } } } - + unsigned int outlimit=100; int main(int argc, char* argv[]) @@ -39,18 +39,18 @@ int main(int argc, char* argv[]) int stats=0; // Type of statistics used in timing test. bool inplace = true; - + #ifndef __SSE2__ fftw::effort |= FFTW_NO_SIMD; -#endif - -#ifdef __GNUC__ +#endif + +#ifdef __GNUC__ optind=0; -#endif +#endif for (;;) { int c = getopt(argc,argv,"hN:m:x:y:z:n:T:S:i:d"); if (c == -1) break; - + switch (c) { case 0: break; @@ -92,13 +92,13 @@ int main(int argc, char* argv[]) my=mx; cout << "mx=" << mx << ", my=" << my << ", mz=" << mz << endl; - + cout << "N=" << N << endl; cout << "inplace=" << inplace << endl; - + cout << "threads=" << fftw::maxthreads << endl; - + size_t align=sizeof(Complex); array3 f(mx,my,mz,align); @@ -111,17 +111,17 @@ int main(int argc, char* argv[]) init(f); cout << "Input:" << endl; - if(mx*my*mz < outlimit) + if(mx*my*mz < outlimit) cout << f << endl; - else + else cout << f[0][0][0] << endl; - transpose.transpose(f(),g()); + transpose.transpose(f(),g()); cout << "Output:" << endl; - if(mx*my*mz < outlimit) + if(mx*my*mz < outlimit) cout << g << endl; - else + else cout << g[0][0][0] << endl; array3 f0(my,mx,mz,align); @@ -130,7 +130,7 @@ int main(int argc, char* argv[]) for(unsigned int i = 0; i < my; ++i) { for(unsigned int j = 0; j < mx; ++j) { for(unsigned int k = 0; k < mz; ++k) { - errmax = max(errmax,abs(g[i][j][k]-f0[i][j][k])); + errmax = max(errmax,abs(g[i][j][k]-f0[i][j][k])); } } } @@ -142,7 +142,7 @@ int main(int argc, char* argv[]) } else { double *T=new double[N]; - + for(unsigned int i=0; i < N; ++i) { init(f); seconds(); @@ -152,11 +152,11 @@ int main(int argc, char* argv[]) timings("transpose",mx,T,N,stats); delete [] T; - } + } if(!inplace) delete pg; - + return 0; } diff --git a/tests/utils.h b/tests/utils.h index 7b4294fb..1503dd95 100644 --- a/tests/utils.h +++ b/tests/utils.h @@ -5,10 +5,10 @@ #include "seconds.h" #include "timing.h" #include "Complex.h" - + #ifdef _WIN32 #include "getopt.h" -inline double cbrt(double x) +inline double cbrt(double x) { if(x == 0.0) return 0.0; static double third=1.0/3.0; @@ -25,7 +25,7 @@ inline T max(const T a, const S b) { return a > (T) b ? a : b; } - + inline void usageCommon(int n) { std::cerr << "Options: " << std::endl; @@ -33,9 +33,9 @@ inline void usageCommon(int n) std::cerr << "-T\t\t number of threads" << std::endl; std::cerr << "-N\t\t number of iterations" << std::endl; std::cerr << "-m\t\t size" << std::endl; - std::cerr << "-S\t\t stats used in timing test: " + std::cerr << "-S\t\t stats used in timing test: " << "0=mean, 1=min, 2=max, 3=median, " - << "4=90th percentile, 5=80th percentile, 6=50th percentile" + << "4=90th percentile, 5=80th percentile, 6=50th percentile" << std::endl; if(n > 1) { std::cerr << "-x\t\t x size" << std::endl; @@ -43,7 +43,7 @@ inline void usageCommon(int n) } if(n > 2) std::cerr << "-z\t\t z size" << std::endl; -} +} inline void usageDirect() { @@ -56,20 +56,20 @@ inline void usage(int n) usageCommon(n); std::cerr << "-A\t\t number of data blocks in input" << std::endl; std::cerr << "-B\t\t number of data blocks in output" << std::endl; -} +} inline void usageInplace(int n) { usageCommon(n); std::cerr << "-i\t\t 0=out-of-place, 1=in-place" << std::endl; -} +} -inline void usageTest() +inline void usageTest() { std::cerr << "-t\t\t accuracy test" << std::endl; } - -inline void usageExplicit(unsigned int n) + +inline void usageExplicit(unsigned int n) { usageDirect(); std::cerr << "-e\t\t explicitly padded convolution" << std::endl; @@ -144,7 +144,7 @@ inline unsigned int ceilpow2(unsigned int n) n |= n >> 16; return ++n; } - + inline unsigned int padding(unsigned int n) { std::cout << "min padded buffer=" << n << std::endl;