Skip to content

Commit

Permalink
128-bit AVX2 SIMD support
Browse files Browse the repository at this point in the history
Add 128 bit support for AVX2. Similar to AVX-128, this
improves slightly on SSE2 due to more efficient instructions,
and the shorter SIMD width is beneficial in some cases. Both
128- and 256-bit flavors will be built automatically with
--enable-avx2, and the timing routines will chose the best one
automatically.
  • Loading branch information
Erik Lindahl committed Mar 28, 2015
1 parent da988fa commit de81bfd
Show file tree
Hide file tree
Showing 16 changed files with 395 additions and 43 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ rdft/simd/altivec/*.c
rdft/simd/avx/*.c
rdft/simd/avx-128/*.c
rdft/simd/avx2/*.c
rdft/simd/avx2-128/*.c
rdft/simd/common/*.c
rdft/simd/kcvi/*.c
rdft/simd/neon/*.c
Expand All @@ -30,6 +31,7 @@ dft/simd/altivec/*.c
dft/simd/avx/*.c
dft/simd/avx-128/*.c
dft/simd/avx2/*.c
dft/simd/avx2-128/*.c
dft/simd/common/*.c
dft/simd/kcvi/*.c
dft/simd/neon/*.c
Expand Down
4 changes: 3 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ endif

if HAVE_AVX2
AVX2_LIBS = dft/simd/avx2/libdft_avx2_codelets.la \
rdft/simd/avx2/librdft_avx2_codelets.la
dft/simd/avx2-128/libdft_avx2_128_codelets.la \
rdft/simd/avx2/librdft_avx2_codelets.la \
rdft/simd/avx2-128/librdft_avx2_128_codelets.la
endif

if HAVE_KCVI
Expand Down
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ AC_CONFIG_FILES([
dft/simd/avx/Makefile
dft/simd/avx-128/Makefile
dft/simd/avx2/Makefile
dft/simd/avx2-128/Makefile
dft/simd/kcvi/Makefile
dft/simd/altivec/Makefile
dft/simd/neon/Makefile
Expand All @@ -630,6 +631,7 @@ AC_CONFIG_FILES([
rdft/simd/avx/Makefile
rdft/simd/avx-128/Makefile
rdft/simd/avx2/Makefile
rdft/simd/avx2-128/Makefile
rdft/simd/kcvi/Makefile
rdft/simd/altivec/Makefile
rdft/simd/neon/Makefile
Expand Down
1 change: 1 addition & 0 deletions dft/codelet-dft.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ extern const solvtab X(solvtab_dft_sse2);
extern const solvtab X(solvtab_dft_avx);
extern const solvtab X(solvtab_dft_avx_128);
extern const solvtab X(solvtab_dft_avx2);
extern const solvtab X(solvtab_dft_avx2_128);
extern const solvtab X(solvtab_dft_kcvi);
extern const solvtab X(solvtab_dft_altivec);
extern const solvtab X(solvtab_dft_neon);
Expand Down
5 changes: 4 additions & 1 deletion dft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@ void X(dft_conf_standard)(planner *p)
#endif
#if HAVE_AVX2
if (X(have_simd_avx2)())
X(solvtab_exec)(X(solvtab_dft_avx2), p);
{
X(solvtab_exec)(X(solvtab_dft_avx2), p);
X(solvtab_exec)(X(solvtab_dft_avx2_128), p);
}
#endif
#if HAVE_KCVI
if (X(have_simd_kcvi)())
Expand Down
2 changes: 1 addition & 1 deletion dft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SUBDIRS = common sse2 avx avx-128 avx2 kcvi altivec neon
SUBDIRS = common sse2 avx avx-128 avx2 avx2-128 kcvi altivec neon
EXTRA_DIST = n1b.h n1f.h n2b.h n2f.h n2s.h q1b.h q1f.h t1b.h t1bu.h \
t1f.h t1fu.h t2b.h t2f.h t3b.h t3f.h ts.h codlist.mk simd.mk
13 changes: 13 additions & 0 deletions dft/simd/avx2-128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
AM_CFLAGS = $(AVX2_CFLAGS)
SIMD_HEADER=simd-avx2-128.h

include $(top_srcdir)/dft/simd/codlist.mk
include $(top_srcdir)/dft/simd/simd.mk

if HAVE_AVX2

BUILT_SOURCES = $(EXTRA_DIST)
noinst_LTLIBRARIES = libdft_avx2_128_codelets.la
libdft_avx2_128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif
7 changes: 5 additions & 2 deletions kernel/ifftw.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,11 @@ extern void X(extract_reim)(int sign, R *c, R **r, R **i);
#define CIMPLIES(ante, post) (!(ante) || (post))

/* define HAVE_SIMD if any simd extensions are supported */
#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || \
defined(HAVE_MIPS_PS) || defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_KCVI)
#if defined(HAVE_SSE) || defined(HAVE_SSE2) || \
defined(HAVE_AVX) || defined(HAVE_AVX2) || \
defined(HAVE_KCVI) || \
defined(HAVE_ALTIVEC) || \
defined(HAVE_MIPS_PS)
#define HAVE_SIMD 1
#else
#define HAVE_SIMD 0
Expand Down
1 change: 1 addition & 0 deletions rdft/codelet-rdft.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ extern const solvtab X(solvtab_rdft_sse2);
extern const solvtab X(solvtab_rdft_avx);
extern const solvtab X(solvtab_rdft_avx_128);
extern const solvtab X(solvtab_rdft_avx2);
extern const solvtab X(solvtab_rdft_avx2_128);
extern const solvtab X(solvtab_rdft_kcvi);
extern const solvtab X(solvtab_rdft_altivec);
extern const solvtab X(solvtab_rdft_neon);
Expand Down
5 changes: 4 additions & 1 deletion rdft/conf.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ void X(rdft_conf_standard)(planner *p)
#endif
#if HAVE_AVX2
if (X(have_simd_avx2)())
X(solvtab_exec)(X(solvtab_rdft_avx2), p);
{
X(solvtab_exec)(X(solvtab_rdft_avx2), p);
X(solvtab_exec)(X(solvtab_rdft_avx2_128), p);
}
#endif
#if HAVE_KCVI
if (X(have_simd_kcvi)())
Expand Down
2 changes: 1 addition & 1 deletion rdft/simd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
SUBDIRS = common sse2 avx avx-128 avx2 kcvi altivec neon
SUBDIRS = common sse2 avx avx-128 avx2 avx2-128 kcvi altivec neon
EXTRA_DIST = hc2cbv.h hc2cfv.h codlist.mk simd.mk
15 changes: 15 additions & 0 deletions rdft/simd/avx2-128/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
AM_CFLAGS = $(AVX2_CFLAGS)
SIMD_HEADER=simd-avx2-128.h

include $(top_srcdir)/rdft/simd/codlist.mk
include $(top_srcdir)/rdft/simd/simd.mk

if HAVE_AVX2

noinst_LTLIBRARIES = librdft_avx2_128_codelets.la
BUILT_SOURCES = $(EXTRA_DIST)
librdft_avx2_128_codelets_la_SOURCES = $(BUILT_SOURCES)

endif


4 changes: 2 additions & 2 deletions simd-support/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ libsimd_support_la_SOURCES = taint.c simd-common.h \
x86-cpuid.h amd64-cpuid.h \
simd-sse2.h sse2.c \
avx.c simd-avx.h simd-avx-128.h \
avx2.c simd-avx2.h \
avx2.c simd-avx2.h simd-avx2-128.h \
kcvi.c simd-kcvi.h \
altivec.c simd-altivec.h \
altivec.c simd-altivec.h \
neon.c simd-neon.h

# sse2-nonportable.c needs SSE2_CFLAGS, but Automake does not support
Expand Down
4 changes: 2 additions & 2 deletions simd-support/avx2.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2003, 2007-11 Matteo Frigo
* Copyright (c) 2003, 2007-11 Massachusetts Institute of Technology
* Copyright (c) 2003, 2007-14 Matteo Frigo
* Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down
Loading

0 comments on commit de81bfd

Please sign in to comment.