Skip to content

Commit

Permalink
Add VZEROUPPER at the end of AVX codelets
Browse files Browse the repository at this point in the history
If the Intel Optimization Manual is to be believed, we need to wave a
dead chicken before transitioning from AVX code to SSE code.  I am
supposed to believe that there is a transition penalty for doing so,
unless one uses a magic VZEROUPPER instruction that apparently has
zero cost.  Whatever.
  • Loading branch information
matteo-frigo committed Jun 20, 2011
1 parent 1b26ff6 commit 1ed535e
Show file tree
Hide file tree
Showing 20 changed files with 27 additions and 16 deletions.
2 changes: 2 additions & 0 deletions genfft/c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type c_decl =

and c_ast =
| Asch of annotated_schedule
| Simd_leavefun
| Return of c_ast
| For of c_ast * c_ast * c_ast * c_ast
| If of c_ast * c_ast
Expand Down Expand Up @@ -204,6 +205,7 @@ and unparse_ast =
in
function
| Asch a -> (unparse_annotated true a)
| Simd_leavefun -> "" (* used only in SIMD code *)
| Return x -> "return " ^ unparse_ast x ^ ";"
| For (a, b, c, d) ->
"for (" ^
Expand Down
1 change: 1 addition & 0 deletions genfft/c.mli
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type c_decl =

and c_ast =
| Asch of Annotate.annotated_schedule
| Simd_leavefun
| Return of c_ast
| For of c_ast * c_ast * c_ast * c_ast
| If of c_ast * c_ast
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_hc2c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_hc2cdft.ml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_hc2cdft_c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_hc2hc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_mdct.ml
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ let generate n mode =
@ (if (not (window_param mode)) then []
else [Decl (C.constrealtypep, window)])
),
add_constants (Asch annot))
finalize_fcn (Asch annot))

in
(unparse tree) ^ "\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_notw.ml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ let generate n =
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
add_constants body)
finalize_fcn body)

in let desc =
Printf.sprintf
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_notw_c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ let generate n =
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
add_constants body)
finalize_fcn body)

in
let desc =
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_r2cb.ml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ let generate n =
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
add_constants body)
finalize_fcn body)

in let desc =
Printf.sprintf
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_r2cf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ let generate n =
Decl ("INT", v);
Decl ("INT", "ivs");
Decl ("INT", "ovs")]),
add_constants body)
finalize_fcn body)

in let desc =
Printf.sprintf
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_r2r.ml
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ let generate n mode =
else [Decl ("INT", "ivs")])
@ (if stride_fixed !uovstride then []
else [Decl ("INT", "ovs")]))),
add_constants body)
finalize_fcn body)

in let desc =
Printf.sprintf
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_twiddle.ml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_twiddle_c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_twidsq.ml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
2 changes: 1 addition & 1 deletion genfft/gen_twidsq_c.ml
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ let generate n =
Decl ("INT", mb);
Decl ("INT", me);
Decl ("INT", ms)],
add_constants body)
finalize_fcn body)
in
let twinstr =
Printf.sprintf "static const tw_instr twinstr[] = %s;\n\n"
Expand Down
4 changes: 2 additions & 2 deletions genfft/genutil.ml
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ let unparse tree =
else
C.unparse_function tree)

let add_constants ast =
let finalize_fcn ast =
let mergedecls = function
C.Block (d1, [C.Block (d2, s)]) -> C.Block (d1 @ d2, s)
| x -> x
Expand All @@ -316,7 +316,7 @@ let add_constants ast =
else
C.extract_constants

in mergedecls (C.Block (extract_constants ast, [ast]))
in mergedecls (C.Block (extract_constants ast, [ast; C.Simd_leavefun]))

let twinstr_to_string vl x =
if !Simdmagic.simd_mode then
Expand Down
1 change: 1 addition & 0 deletions genfft/simd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ and unparse_ast ast =
in match ast with
| Asch a -> (unparse_annotated true a)
| Return x -> "return " ^ unparse_ast x ^ ";"
| Simd_leavefun -> "VLEAVE();"
| For (a, b, c, d) ->
"for (" ^
unparse_ast a ^ "; " ^ unparse_ast b ^ "; " ^ unparse_ast c
Expand Down
5 changes: 5 additions & 0 deletions simd-support/simd-avx256d.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,4 +223,9 @@ static inline V BYTWJ2(const R *t, V sr)
#define VFMSCONJ(b,c) VSUB(VCONJ(b),c)
#define VFNMSCONJ(b,c) VSUB(c, VCONJ(b))

/* User VZEROUPPER to avoid the penalty of switching from AVX to
SSE. See Intel Optimization Manual (April 2011, version 248966),
Section 11.3 */
#define VLEAVE _mm256_zeroupper

#include "simd-common.h"
2 changes: 2 additions & 0 deletions simd-support/simd-sse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,6 @@ static inline V BYTWJ2(const R *t, V sr)
#define VFMSCONJ(b,c) VSUB(VCONJ(b),c)
#define VFNMSCONJ(b,c) VSUB(c, VCONJ(b))

#define VLEAVE() /* nothing */

#include "simd-common.h"

0 comments on commit 1ed535e

Please sign in to comment.