From e1b527d72aad02ddea04f266f6831fb13768fbc3 Mon Sep 17 00:00:00 2001 From: athena Date: Sun, 21 Aug 2011 16:16:38 -0400 Subject: [PATCH] Release notes for 3.3.1-beta1 --- NEWS | 4 ++++ configure.ac | 2 +- doc/install.texi | 26 ++++++++++++++++++-------- doc/other.texi | 5 +++-- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index 798a2a899..db86c0fa7 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,9 @@ FFTW 3.3.1 +* Added support for the NEON extensions to the ARM ISA. (Note to beta + users: an ARM cycle counter is not yet implemented; please contact + fftw@fftw.org if you know how to do it right.) + * MPI code now compiles even if mpicc is a C++ compiler; thanks to Kyle Spyksma for the bug report. diff --git a/configure.ac b/configure.ac index 54bbe8e77..eda8349f6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(fftw, 3.3.1, fftw@fftw.org) +AC_INIT(fftw, 3.3.1-beta1, fftw@fftw.org) AC_CONFIG_SRCDIR(kernel/ifftw.h) # fftw-3.1.x was 4:X:1 # fftw-3.2.x was 5:X:2 diff --git a/doc/install.texi b/doc/install.texi index cb70f8418..e7fd9dd29 100644 --- a/doc/install.texi +++ b/doc/install.texi @@ -190,14 +190,14 @@ of the time). @xref{Cycle Counters}. @item @code{--enable-sse}, @code{--enable-sse2}, @code{--enable-avx}, -@code{--enable-altivec}: Enable the compilation of SIMD code for SSE -(Pentium III+), SSE2 (Pentium IV+), AVX (Sandy Bridge, Interlagos), -AltiVec (PowerPC G4+). SSE and AltiVec only work with -@code{--enable-float} (above). SSE2 works in both single and double -precision (and is simply SSE in single precision). The resulting code -will @emph{still work} on earlier CPUs lacking the SIMD extensions -(SIMD is automatically disabled, although the FFTW library is still -larger). +@code{--enable-altivec}, @code{--enable-neon}: Enable the compilation of +SIMD code for SSE (Pentium III+), SSE2 (Pentium IV+), AVX (Sandy Bridge, +Interlagos), AltiVec (PowerPC G4+), NEON (some ARM processors). SSE, +AltiVec, and NEON only work with @code{--enable-float} (above). SSE2 +works in both single and double precision (and is simply SSE in single +precision). The resulting code will @emph{still work} on earlier CPUs +lacking the SIMD extensions (SIMD is automatically disabled, although +the FFTW library is still larger). @itemize @minus @item These options require a compiler supporting SIMD extensions, and @@ -214,6 +214,16 @@ properly aligns the stack when compiling any code that links to FFTW. By default, @code{gcc} 2.95 and later versions align the stack as needed, but you should not compile FFTW with the @code{-Os} option or the @code{-mpreferred-stack-boundary} option with an argument less than 4. +@item +Because of the large variety of ARM processors and ABIs, FFTW +does not attempt to guess the correct @code{gcc} flags for generating +NEON code. In general, you will have to provide them on the command line. +This command line is known to have worked at least once: +@example +./configure --with-slow-timer --host=arm-linux-gnueabi \ + --enable-single --enable-neon \ + "CC=arm-linux-gnueabi-gcc -march=armv7-a -mfloat-abi=softfp" +@end example @end itemize @end itemize diff --git a/doc/other.texi b/doc/other.texi index bda25dde3..55c6f7555 100644 --- a/doc/other.texi +++ b/doc/other.texi @@ -15,8 +15,9 @@ SIMD, which stands for ``Single Instruction Multiple Data,'' is a set of special operations supported by some processors to perform a single operation on several numbers (usually 2 or 4) simultaneously. SIMD floating-point instructions are available on several popular CPUs: -SSE/SSE2/AVX on recent x86/x86-64 processors, AltiVec (single precision) on some PowerPCs (Apple G4 and -higher), and MIPS Paired Single (currently only in FFTW 3.2.x). FFTW can be compiled to support the +SSE/SSE2/AVX on recent x86/x86-64 processors, AltiVec (single precision) +on some PowerPCs (Apple G4 and higher), NEON on some ARM models, and MIPS Paired Single +(currently only in FFTW 3.2.x). FFTW can be compiled to support the SIMD instructions on any of these systems. @cindex SIMD @cindex SSE