Skip to content

Commit

Permalink
Implement running median in statistics.h;
Browse files Browse the repository at this point in the history
use median time for optimization;
optionally normalize within mult routine.
  • Loading branch information
johncbowman committed Aug 27, 2022
1 parent a15bb75 commit 0feef43
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 47 deletions.
38 changes: 36 additions & 2 deletions statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define __statistics_h__ 1

#include <cfloat>
#include <list>

namespace utils {

Expand All @@ -12,9 +13,11 @@ class statistics {
double varH;
double m; // min
double M; // max
std::list<double> T; // Only used if constructed with median=true
bool Median;
public:
statistics() {clear();}
void clear() {N=0; A=varL=varH=0.0; m=DBL_MAX; M=-m;}
statistics(bool median=false) : Median(median) {clear();}
void clear() {N=0; A=varL=varH=0.0; m=DBL_MAX; M=-m; T.clear();}
double count() {return N;}
double mean() {return A;}
double min() {return m;}
Expand All @@ -30,6 +33,27 @@ class statistics {
varL += v;
else
varH += v;

if(Median) {
auto p=T.begin();
if(N == 1 || t <= *p) T.push_front(t);
else {
size_t l=0;
size_t u=N-1;
ssize_t last=0;
while(l < u) {
ssize_t i=(l+u)/2;
std::advance(p,i-last);
last=i;
if(t < *p) u=i;
else {
++p;
if(p == T.end() || t <= *p) {T.insert(p,t); break;}
last=l=i+1;
}
}
}
}
}
double stdev(double var, double f) {
double factor=N > f ? f/(N-f) : 0.0;
Expand All @@ -44,6 +68,16 @@ class statistics {
double stdevH() {
return stdev(varH,2.0);
}
double median() {
if(!Median) {
std::cerr << "Constructor requires median=true" << std::endl;
exit(-1);
}
unsigned int h=N/2;
auto p=T.begin();
std::advance(p,h-1);
return 2*h == N ? 0.5*(*p+*(++p)) : *(++p);
}
void output(const char *text, unsigned int m) {
std::cout << text << ":\n"
<< m << "\t"
Expand Down
4 changes: 2 additions & 2 deletions tests/cconv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ int main(int argc, char* argv[])

timings("Implicit",m,T,N,stats);

if(m < 100) {
if(m < 100 && false) {
for(unsigned int b=0; b < B; ++b) {
for(unsigned int i=0; i < m; i++)
cout << F[b][i] << endl;
Expand Down Expand Up @@ -252,7 +252,7 @@ int main(int argc, char* argv[])
cout << endl;
timings("Explicit",m,T,N,stats);

if(m < 100)
if(m < 100 && false)
for(unsigned int i=0; i < m; i++)
cout << F[0][i] << endl;
else cout << F[0][0] << endl;
Expand Down
2 changes: 1 addition & 1 deletion tests/conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ int main(int argc, char* argv[])
cout << endl;
timings("Explicit",2*m-1,T,N,stats);

if(m < 100 && false)
if(m < 100)
for(unsigned int i=0; i < m; i++)
cout << f[i] << endl;
else
Expand Down
27 changes: 14 additions & 13 deletions tests/convolve.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ void fftBase::OptBase::check(unsigned int L, unsigned int M,
if(q == 1 || valid(D,p,S)) {
// cout << "D=" << D << ", m=" << m << endl;
double t=time(L,M,C,S,m,q,D,app);
// cout << p << " " << q << ": " << t << endl;
if(t < T) {
this->m=m;
this->q=q;
Expand Down Expand Up @@ -276,28 +277,28 @@ fftBase::~fftBase()
deleteAlign(ZetaqmS0);
}

double fftBase::mintime(Application& app, double *Stdev)
double fftBase::medianTime(Application& app, double *Stdev)
{
unsigned int K=1;
double eps=0.1;

statistics Stats;
statistics Stats(true);
app.init(*this);
static ratio r=chrono::steady_clock::period();
double threshold=5000*r.num/r.den;
while(true) {
Stats.add(app.time(*this,K));
double mean=Stats.mean();
double min=Stats.min();
double t=app.time(*this,K)/K;
Stats.add(t);
double stdev=Stats.stdev();
if(Stats.count() < 7) continue;
int threshold=5000;
ratio r=chrono::steady_clock::period();
if(min*r.den < threshold*r.num || eps*mean < stdev) {
double value=Stats.median();
if(value < threshold || eps*value < stdev) {
K *= 2;
Stats.clear();
} else {
if(Stdev) *Stdev=stdev/K;
if(Stdev) *Stdev=stdev;
app.clear();
return min/K;
return value;
}
}
return 0.0;
Expand All @@ -308,11 +309,11 @@ double fftBase::report(Application& app)
double stdev;
cout << endl;

double min=mintime(app,&stdev);
double median=medianTime(app,&stdev);

cout << "min=" << min << " stdev=" << stdev << endl;
cout << "median=" << median << " stdev=" << stdev << endl;

return min;
return median;
}

void fftBase::common()
Expand Down
8 changes: 4 additions & 4 deletions tests/convolve.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ class fftBase : public ThreadBase {
return overwrite && A >= B;
}

double mintime(Application& app, double *Stdev=NULL);
double medianTime(Application& app, double *Stdev=NULL);
double report(Application& app);
};

Expand All @@ -412,7 +412,7 @@ class fftPad : public fftBase {
unsigned int m, unsigned int q,unsigned int D,
Application &app) {
fftPad fft(L,M,C,S,m,q,D,app.Threads(),false);
return fft.mintime(app);
return fft.medianTime(app);
}
};

Expand Down Expand Up @@ -512,7 +512,7 @@ class fftPadCentered : public fftPad {
unsigned int m, unsigned int q, unsigned int D,
Application &app) {
fftPadCentered fft(L,M,C,S,m,q,D,app.Threads());
return fft.mintime(app);
return fft.medianTime(app);
}
};

Expand Down Expand Up @@ -617,7 +617,7 @@ class fftPadHermitian : public fftBase {
unsigned int m, unsigned int q, unsigned int D,
Application &app) {
fftPadHermitian fft(L,M,C,m,q,D,app.Threads());
return fft.mintime(app);
return fft.medianTime(app);
}
};

Expand Down
27 changes: 18 additions & 9 deletions tests/explicit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@

namespace fftwpp {

// This multiplication routine is for binary convolutions and takes two inputs
// of size m.
// F[0][j] *= F[1][j];
void multbinary(Complex **F, unsigned int m, unsigned int threads)
{
Complex* F0=F[0];
Complex* F1=F[1];

double ninv=1.0/m;

PARALLEL(
for(unsigned int j=0; j < m; ++j)
F0[j] *= ninv*F1[j];
);
}

// This multiplication routine is for binary Hermitian convolutions and takes
// two inputs.
// F[0][j] *= F[1][j];
Expand All @@ -30,7 +46,7 @@ void ExplicitConvolution::forwards(Complex *f)
Forwards->fft(f);
}

void ExplicitConvolution::convolve(Complex **F, multiplier *mult)
void ExplicitConvolution::convolve(Complex **F, Multiplier *mult)
{
const unsigned int A=2;
const unsigned int B=1;
Expand All @@ -40,17 +56,10 @@ void ExplicitConvolution::convolve(Complex **F, multiplier *mult)
backwards(F[a]);
}

(*mult)(F,n,0,NULL,0,threads);
(*mult)(F,n,threads);

for(unsigned int b=0; b < B; ++b)
forwards(F[b]);

double ninv=1.0/n;
for(unsigned int b=0; b < B; ++b) {
Complex *f=F[b];
for(unsigned int j=0; j < m; ++j)
f[j] *= ninv;
}
}

void ExplicitConvolution::convolve(Complex *f, Complex *g)
Expand Down
6 changes: 5 additions & 1 deletion tests/explicit.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

namespace fftwpp {

// Specialized mult routines for testing explicit convolutions:
typedef void Multiplier(Complex **, unsigned int m,
unsigned int threads);
typedef void Realmultiplier(double **, unsigned int m,
unsigned int threads);

Multiplier multbinary;
Realmultiplier multbinary;

class ExplicitPad : public ThreadBase {
Expand Down Expand Up @@ -222,7 +226,7 @@ class ExplicitConvolution : public ExplicitPad {
void forwards(Complex *f);

// F is an array of pointers to distinct data blocks each of size n.
void convolve(Complex **F, multiplier *mult);
void convolve(Complex **F, Multiplier *mult);

// Compute f (*) g. The distinct input arrays f and g are each of size n
// (contents not preserved). The output is returned in f.
Expand Down
18 changes: 17 additions & 1 deletion tests/hybridconv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@ unsigned int B=1; // number of outputs
unsigned int L=512; // input data length
unsigned int M=1024; // minimum padded length

// This multiplication routine is for binary convolutions and takes
// two Complex inputs of size n and outputs one Complex value.
// F0[j] *= F1[j];
void multbinaryNormalized(Complex **F, unsigned int offset, unsigned int n,
unsigned int threads)
{
Complex *F0=F[0]+offset;
Complex *F1=F[1]+offset;

double ninv=1.0/n;
PARALLEL(
for(unsigned int j=0; j < n; ++j)
F0[j] *= ninv*F1[j];
);
}

int main(int argc, char* argv[])
{
fftw::maxthreads=get_max_threads();
Expand Down Expand Up @@ -62,7 +78,7 @@ int main(int argc, char* argv[])
#endif
for(unsigned int k=0; k < K; ++k) {
seconds();
Convolve.convolve(f,multbinary);
Convolve.convolveRaw(f,multbinaryNormalized);
T[k]=seconds();
}

Expand Down
18 changes: 17 additions & 1 deletion tests/hybridconvh.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ unsigned int B=1; // number of outputs
unsigned int L=512; // input data length
unsigned int M=768; // minimum padded length

// This multiplication routine is for binary convolutions and takes
// two real inputs of size n.
// F0[j] *= F1[j];
void realmultbinaryNormalized(Complex **F, unsigned int offset, unsigned int n,
unsigned int threads)
{
double *F0=(double *) (F[0]+offset);
double *F1=(double *) (F[1]+offset);

double ninv=1.0/n;
PARALLEL(
for(unsigned int j=0; j < n; ++j)
F0[j] *= ninv*F1[j];
);
}

int main(int argc, char* argv[])
{
fftw::maxthreads=get_max_threads();
Expand Down Expand Up @@ -62,7 +78,7 @@ int main(int argc, char* argv[])
#endif
for(unsigned int k=0; k < K; ++k) {
seconds();
Convolve.convolve(f,realmultbinary);
Convolve.convolveRaw(f,realmultbinaryNormalized);
T[k]=seconds();
}

Expand Down
Loading

0 comments on commit 0feef43

Please sign in to comment.