Skip to content

Commit

Permalink
Improved AVX SIMD
Browse files Browse the repository at this point in the history
Previously, some kernels were actually faster with the old SSE2
SIMD, which made it necessary to compile with both sse2 and avx
for good performance. This adds 128-bit AVX kernels which are
enabled together with the standard AVX kernels. Apart from
being encoded with AVX rather than SSE instructions
(depending on compiler flags), it also uses a couple of new
instructions only available with AVX that use fewer micro-ops.
These instructions have also been added to the 256-bit AVX SIMD
implementation. No new configure flags needed, it is just faster.
  • Loading branch information
Erik Lindahl committed Mar 25, 2015
1 parent 131027a commit b606e31
Show file tree
Hide file tree
Showing 14 changed files with 393 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ _build
# generated codelets:
rdft/simd/altivec/*.c
rdft/simd/avx/*.c
rdft/simd/avx-128/*.c
rdft/simd/common/*.c
rdft/simd/neon/*.c
rdft/simd/sse2/*.c
Expand All @@ -25,6 +26,7 @@ rdft/scalar/r2r/*.c
dft/scalar/codelets/*.c
dft/simd/altivec/*.c
dft/simd/avx/*.c
dft/simd/avx-128/*.c
dft/simd/common/*.c
dft/simd/neon/*.c
dft/simd/sse2/*.c
Expand Down
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ endif

if HAVE_AVX
AVX_LIBS = dft/simd/avx/libdft_avx_codelets.la \
rdft/simd/avx/librdft_avx_codelets.la
dft/simd/avx-128/libdft_avx_128_codelets.la \
rdft/simd/avx/librdft_avx_codelets.la \
rdft/simd/avx-128/librdft_avx_128_codelets.la
endif

if HAVE_ALTIVEC
Expand Down
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ AC_CONFIG_FILES([
dft/simd/common/Makefile
dft/simd/sse2/Makefile
dft/simd/avx/Makefile
dft/simd/avx-128/Makefile
dft/simd/altivec/Makefile
dft/simd/neon/Makefile
Expand All @@ -588,6 +589,7 @@ AC_CONFIG_FILES([
rdft/simd/common/Makefile
rdft/simd/sse2/Makefile
rdft/simd/avx/Makefile
rdft/simd/avx-128/Makefile
rdft/simd/altivec/Makefile
rdft/simd/neon/Makefile
Expand Down
1 change: 1 addition & 0 deletions dft/codelet-dft.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ void X(kdft_difsq_register)(planner *p, kdftwsq codelet, const ct_desc *desc);
extern const solvtab X(solvtab_dft_standard);
extern const solvtab X(solvtab_dft_sse2);
extern const solvtab X(solvtab_dft_avx);
extern const solvtab X(solvtab_dft_avx_128);
extern const solvtab X(solvtab_dft_altivec);
extern const solvtab X(solvtab_dft_neon);

Expand Down
5 changes: 4 additions & 1 deletion dft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ void X(dft_conf_standard)(planner *p)
#endif
#if HAVE_AVX
if (X(have_simd_avx)())
X(solvtab_exec)(X(solvtab_dft_avx), p);
{
X(solvtab_exec)(X(solvtab_dft_avx), p);
X(solvtab_exec)(X(solvtab_dft_avx_128), p);
}
#endif
#if HAVE_ALTIVEC
if (X(have_simd_altivec)())
Expand Down
2 changes: 1 addition & 1 deletion dft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SUBDIRS = common sse2 avx altivec neon
SUBDIRS = common sse2 avx avx-128 altivec neon
EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \
t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions dft/simd/avx-128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(AVX_CFLAGS)
SIMD_HEADER=simd-avx-128.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_AVX

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_avx_128_codelets.la
libdft_avx_128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
1 change: 1 addition & 0 deletions rdft/codelet-rdft.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ extern const solvtab X(solvtab_rdft_r2cf);
extern const solvtab X(solvtab_rdft_r2cb);
extern const solvtab X(solvtab_rdft_sse2);
extern const solvtab X(solvtab_rdft_avx);
extern const solvtab X(solvtab_rdft_avx_128);
extern const solvtab X(solvtab_rdft_altivec);
extern const solvtab X(solvtab_rdft_neon);

Expand Down
3 changes: 3 additions & 0 deletions rdft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ void X(rdft_conf_standard)(planner *p)
#endif
#if HAVE_AVX
if (X(have_simd_avx)())
{
X(solvtab_exec)(X(solvtab_rdft_avx), p);
X(solvtab_exec)(X(solvtab_rdft_avx_128), p);
}
#endif
#if HAVE_ALTIVEC
if (X(have_simd_altivec)())
Expand Down
2 changes: 1 addition & 1 deletion rdft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SUBDIRS = common sse2 avx altivec neon
SUBDIRS = common sse2 avx avx-128 altivec neon
EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk
15 changes: 15 additions & 0 deletions rdft/simd/avx-128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
AM_CFLAGS = $(AVX_CFLAGS)
SIMD_HEADER=simd-avx-128.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_AVX

noinst_LTLIBRARIES = librdft_avx_128_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_avx_128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif


7 changes: 5 additions & 2 deletions simd-support/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
AM_CPPFLAGS = -I$(top_srcdir)/kernel
noinst_LTLIBRARIES = libsimd_support.la libsimd_sse2_nonportable.la

libsimd_support_la_SOURCES = taint.c simd-common.h simd-sse2.h sse2.c \
x86-cpuid.h amd64-cpuid.h avx.c simd-avx.h altivec.c simd-altivec.h \
libsimd_support_la_SOURCES = taint.c simd-common.h \
x86-cpuid.h amd64-cpuid.h \
simd-sse2.h sse2.c \
avx.c simd-avx.h simd-avx-128.h \
altivec.c simd-altivec.h \
neon.c simd-neon.h

# sse2-nonportable.c needs SSE2_CFLAGS, but Automake does not support
Expand Down
Loading

0 comments on commit b606e31

Please sign in to comment.