Skip to content

Commit

Permalink
Avoid transforming uninitalized data.
Browse files Browse the repository at this point in the history
In r2c/c2r transforms when using 4-way SIMD, sometimes FFTW uses the
following hack: to transform an odd number of inputs, it copies the
input into a buffer that holds space for one extra input; it
transforms the buffer (now comprising an even number of inputs, as
required by SIMD); it copies back the odd number of transformed
inputs, ignoring the padding element.

The extra input was uninitialized until now.  This is ok because we
ignore the transform of the uninitialized input.  Transforming
uninitialized data may cause floating-point exceptions, an effect that
is observable.  This patch initializes the additional elements to
zero, thus avoiding the problem.

This patch also includes a test, but the test is disabled by default
because it is nonportable.  To observe the FP exception, one must use
feenableexcept(), which appears to be a GNU-ism.
  • Loading branch information
matteo-frigo committed Jul 27, 2014
1 parent 2493129 commit cde4559
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 5 deletions.
9 changes: 9 additions & 0 deletions kernel/cpy2d-pair.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ void X(cpy2d_pair)(R *I0, R *I1, R *O0, R *O1,
}
}

void X(zero1d_pair)(R *O0, R *O1, INT n0, INT os0)
{
INT i0;
for (i0 = 0; i0 < n0; ++i0) {
O0[i0 * os0] = 0;
O1[i0 * os0] = 0;
}
}

/* like cpy2d_pair, but read input contiguously if possible */
void X(cpy2d_pair_ci)(R *I0, R *I1, R *O0, R *O1,
INT n0, INT is0, INT os0,
Expand Down
1 change: 1 addition & 0 deletions kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ void X(tile2d)(INT n0l, INT n0u, INT n1l, INT n1u, INT tilesz,
void (*f)(INT n0l, INT n0u, INT n1l, INT n1u, void *args),
void *args);
void X(cpy1d)(R *I, R *O, INT n0, INT is0, INT os0, INT vl);
void X(zero1d_pair)(R *O0, R *O1, INT n0, INT os0);
void X(cpy2d)(R *I, R *O,
INT n0, INT is0, INT os0,
INT n1, INT is1, INT os1,
Expand Down
22 changes: 17 additions & 5 deletions rdft/ct-hc2c-direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,33 @@ static void dobatch(const P *ego, R *Rp, R *Ip, R *Rm, R *Im,
INT rs = WS(ego->rs, 1);
INT ms = ego->ms;
R *bufm = bufp + b - 2;
INT n = me - mb;

X(cpy2d_pair_ci)(Rp + mb * ms, Ip + mb * ms, bufp, bufp + 1,
ego->r / 2, rs, b,
me - mb, ms, 2);
n, ms, 2);
X(cpy2d_pair_ci)(Rm - mb * ms, Im - mb * ms, bufm, bufm + 1,
ego->r / 2, rs, b,
me - mb, -ms, -2);
n, -ms, -2);

if (extra_iter) {
/* initialize the extra_iter element to 0. It would be ok
to leave it uninitialized, since we transform uninitialized
data and ignore the result. However, we want to avoid
FP exceptions in case somebody is trapping them. */
A(n < compute_batchsize(ego->r));
X(zero1d_pair)(bufp + 2*n, bufp + 1 + 2*n, ego->r / 2, b);
X(zero1d_pair)(bufm - 2*n, bufm + 1 - 2*n, ego->r / 2, b);
}

ego->k(bufp, bufp + 1, bufm, bufm + 1, ego->td->W,
ego->brs, mb, me + extra_iter, 2);
X(cpy2d_pair_co)(bufp, bufp + 1, Rp + mb * ms, Ip + mb * ms,
ego->r / 2, b, rs,
me - mb, 2, ms);
n, 2, ms);
X(cpy2d_pair_co)(bufm, bufm + 1, Rm - mb * ms, Im - mb * ms,
ego->r / 2, b, rs,
me - mb, -2, -ms);
n, -2, -ms);
}

static void apply_buf(const plan *ego_, R *cr, R *ci)
Expand Down Expand Up @@ -222,7 +234,7 @@ static int applicable0(const S *ego, rdft_kind kind,
&& ((*extra_iter = 0,
e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms,
rs, 1, (m+1)/2, ms, plnr))
||
||
(*extra_iter = 1,
((e->genus->okp(cr + ms, ci + ms, cr + (m-1)*ms, ci + (m-1)*ms,
rs, 1, (m-1)/2, ms, plnr))
Expand Down
40 changes: 40 additions & 0 deletions tests/fftw-bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@
#include <string.h>
#include "fftw-bench.h"

/* define to enable code that traps floating-point exceptions.
Disabled by default because I don't want to worry about the
portability of such code. feenableexcept() seems to be a GNU
thing */
#undef TRAP_FP_EXCEPTIONS

#ifdef TRAP_FP_EXCEPTIONS
# include <signal.h>
# include <fenv.h>
#endif

#ifdef _OPENMP
# include <omp.h>
#endif
Expand All @@ -31,6 +42,33 @@ extern void uninstall_hook(void); /* in hook.c */
extern unsigned FFTW(random_estimate_seed);
#endif

#ifdef TRAP_FP_EXCEPTIONS
static void sigfpe_handler(int sig, siginfo_t *info, void *context)
{
/* fftw code is not supposed to generate FP exceptions */
UNUSED(sig); UNUSED(info); UNUSED(context);
fprintf(stderr, "caught FPE, aborting\n");
abort();
}

static void setup_sigfpe_handler(void)
{
struct sigaction a;
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW);
memset(&a, 0, sizeof(a));
a.sa_sigaction = sigfpe_handler;
a.sa_flags = SA_SIGINFO;
if (sigaction(SIGFPE, &a, NULL) == -1) {
fprintf(stderr, "cannot install sigfpe handler\n");
exit(1);
}
}
#else
static void setup_sigfpe_handler(void)
{
}
#endif

void useropt(const char *arg)
{
int x;
Expand Down Expand Up @@ -162,6 +200,8 @@ void setup(bench_problem *p)
{
double tim;

setup_sigfpe_handler();

if (amnesia) {
FFTW(forget_wisdom)();
havewisdom = 0;
Expand Down

0 comments on commit cde4559

Please sign in to comment.