Skip to content

Commit

Permalink
Heuristic: do not use t2 simd codelets for N>1024.
Browse files Browse the repository at this point in the history
  • Loading branch information
matteo-frigo committed Sep 11, 2005
1 parent 8c4b74a commit c5134ff
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 92 deletions.
4 changes: 2 additions & 2 deletions dft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
AM_CPPFLAGS = -I$(top_srcdir)/kernel -I$(top_srcdir)/dft -I$(top_srcdir)/simd
noinst_LTLIBRARIES = libdft_simd.la
SUBDIRS = codelets
libdft_simd_la_SOURCES = n1b.c n1f.c n2b.c n2f.c q1b.c q1f.c t1b.c \
t1f.c n1b.h n1f.h n2b.h n2f.h q1b.h q1f.h t1b.h t1f.h t2b.h t2f.h
libdft_simd_la_SOURCES = n1b.c n1f.c n2b.c n2f.c q1b.c q1f.c t.c n1b.h \
n1f.h n2b.h n2f.h q1b.h q1f.h t1b.h t1f.h t2b.h t2f.h
98 changes: 98 additions & 0 deletions dft/simd/t.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright (c) 2003 Matteo Frigo
* Copyright (c) 2003 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/

#include "codelet-dft.h"

#if HAVE_SIMD

#include "simd.h"

static int okp_common(const ct_desc *d,
const R *rio, const R *iio,
int ios, int vs, int m, int dist,
const planner *plnr)
{
UNUSED(rio);
UNUSED(iio);
return (RIGHT_CPU()
&& !NO_SIMDP(plnr)
&& SIMD_STRIDE_OKA(ios)
&& SIMD_VSTRIDE_OKA(dist)
&& (m % VL) == 0
&& (!d->s1 || (d->s1 == ios))
&& (!d->s2 || (d->s2 == vs))
&& (!d->dist || (d->dist == dist))
);
}

static int okp_t1f(const ct_desc *d,
const R *rio, const R *iio,
int ios, int vs, int m, int dist,
const planner *plnr)
{
return okp_common(d, rio, iio, ios, vs, m, dist, plnr)
&& iio == rio + 1
&& ALIGNEDA(rio);
}

const ct_genus X(dft_t1fsimd_genus) = { okp_t1f, VL };

static int okp_t1b(const ct_desc *d,
const R *rio, const R *iio,
int ios, int vs, int m, int dist,
const planner *plnr)
{
return okp_common(d, rio, iio, ios, vs, m, dist, plnr)
&& rio == iio + 1
&& ALIGNEDA(iio);
}

const ct_genus X(dft_t1bsimd_genus) = { okp_t1b, VL };

/* use t2* codelets only when n = m*radix is small, because
t2* codelets use ~2n twiddle factors (instead of ~n) */
static int small_enough(const ct_desc *d, int m)
{
return m * d->radix <= 1024;
}

static int okp_t2f(const ct_desc *d,
const R *rio, const R *iio,
int ios, int vs, int m, int dist,
const planner *plnr)
{
return okp_t1f(d, rio, iio, ios, vs, m, dist, plnr)
&& small_enough(d, m);
}

const ct_genus X(dft_t2fsimd_genus) = { okp_t2f, VL };

static int okp_t2b(const ct_desc *d,
const R *rio, const R *iio,
int ios, int vs, int m, int dist,
const planner *plnr)
{
return okp_t1b(d, rio, iio, ios, vs, m, dist, plnr)
&& small_enough(d, m);
}

const ct_genus X(dft_t2bsimd_genus) = { okp_t2b, VL };

#endif /* HAVE_SIMD */
43 changes: 0 additions & 43 deletions dft/simd/t1b.c

This file was deleted.

43 changes: 0 additions & 43 deletions dft/simd/t1f.c

This file was deleted.

3 changes: 1 addition & 2 deletions dft/simd/t2b.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#define BYTW BYTW2
#define BYTWJ BYTWJ2

/* genus is the same as t1b */
#define GENUS X(dft_t1bsimd_genus)
#define GENUS X(dft_t2bsimd_genus)
extern const ct_genus GENUS;

3 changes: 1 addition & 2 deletions dft/simd/t2f.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#define BYTW BYTW2
#define BYTWJ BYTWJ2

/* genus is the same as t1f */
#define GENUS X(dft_t1fsimd_genus)
#define GENUS X(dft_t2fsimd_genus)
extern const ct_genus GENUS;

0 comments on commit c5134ff

Please sign in to comment.