forked from FFTW/fftw3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvAnnotatedScheduler.ml
182 lines (157 loc) · 5.84 KB
/
vAnnotatedScheduler.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
(*
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
* Copyright (c) 2000-2001 Stefan Kral
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*)
(* Here, we take a schedule (produced by Vschedule) ordering a
sequence of instructions, and produce an annotated schedule. The
annotated schedule has the same ordering as the original schedule,
but is additionally partitioned into nested blocks of temporary
variables. The partitioning is computed via a heuristic algorithm.
The blocking allows the C code that we generate to consist of
nested blocks that help communicate variable lifetimes to the
compiler. *)
(* In the standard distribution of FFTW 2.1.3 there is a module called
'Asched'. This module ('VAnnotatedScheduler') only slightly differs
from Asched. It has been adapted to deal with structures defined in
module VSimdBasics. *)
open Util
open VScheduler
open VSimdBasics
open VSimdInstrOperandSet
type vannotated_schedule =
VAnnotate of
VSimdInstrOperandSet.t *
VSimdInstrOperandSet.t *
VSimdInstrOperandSet.t *
int *
vaschedule
and vaschedule =
| VADone
| VAInstr of vsimdinstr
| VASeq of vannotated_schedule * vannotated_schedule
(* find all variables used inside a scheduling unit *)
let find_block_vars =
let rec find_block_vars' set0 = function
| VDone -> set0
| VInstr i -> List.fold_right add (vsimdinstrToSrcoperands i) set0
| VPar ps -> List.fold_left find_block_vars' set0 ps
| VSeq(a,b) -> find_block_vars' (find_block_vars' set0 a) b
in find_block_vars' empty
let overlap xs zs = cardinal (inter xs zs)
(* reorder a list of schedules so as to maximize overlap of variables *)
let reorder l =
let rec loop = function
| [] -> []
| (a,va) :: b ->
let c = List.map (fun (a, x) -> ((a, x), overlap va x)) b in
let c' = Sort.list (fun (_, a) (_, b) -> a > b) c in
let b' = List.map fst c' in
a :: (loop b') in
let l' = List.map (fun x -> x, find_block_vars x) l in
(* start with smallest block --- does this matter ? *)
match l' with
| [] -> []
| _ ->
let m = optionToValue (minimize (fun (_, x) -> cardinal x) l') in
let l'' = Util.remove m l' in
loop (m :: l'')
let rec rewrite_declarations force_decls (VAnnotate(_,_,declared,_,what)) =
let m = !Magic.number_of_variables in
let declare_it declared =
if force_decls || cardinal declared >= m then
(empty, declared)
else
(declared, empty)
in match what with
| VADone ->
VAnnotate(empty, empty, empty, 0, what)
| VAInstr i ->
let (u, d) = declare_it declared in
VAnnotate(empty, u, d, 0, what)
| VASeq(a,b) ->
let ma = rewrite_declarations false a
and mb = rewrite_declarations false b
in let VAnnotate (_, ua, _, _, _) = ma
and VAnnotate (_, ub, _, _, _) = mb
in let (u, d) = declare_it (union declared (union ua ub))
in VAnnotate(empty, u, d, 0, VASeq(ma,mb))
let annotate schedule =
let m = !Magic.number_of_variables in
let rec really_analyze live_at_end = function
| VDone ->
VAnnotate(live_at_end, empty, empty, 0, VADone)
| VInstr i ->
let dsts = vsimdinstrToDstoperands i
and srcs = vsimdinstrToSrcoperands i in
let dsts' = List.fold_right add dsts empty
and srcs' = List.fold_right add srcs live_at_end in
VAnnotate(diff srcs' dsts', dsts', empty, 0, VAInstr i)
(*
VAnnotate(remove dst (List.fold_right add srcs live_at_end),
singleton dst,
empty,
0,
VAInstr i))
*)
| VSeq(a,b) ->
let ab = analyze live_at_end b in
let VAnnotate(live_at_begin_b,defined_b,_,depth_a,_) = ab in
let aa = analyze live_at_begin_b a in
let VAnnotate(live_at_begin_a,defined_a,_,depth_b,_) = aa in
let defined = filter vsimdinstroperand_is_temporary
(union defined_a defined_b) in
let declarable = diff defined live_at_end in
let undeclarable = diff defined declarable
and maxdepth = max depth_a depth_b in
VAnnotate(live_at_begin_a, undeclarable, declarable,
cardinal declarable + maxdepth,
VASeq(aa,ab))
| _ -> failwith "really_analyze"
(* the pick_best machinery is currently unused. Too slow, and
I don't know what `best' means *)
and analyze l =
let pick_best w =
optionToValue
(minimize
(function VAnnotate(_,_,_,depth,_) -> depth)
(List.map (really_analyze l) w))
in function
| VSeq (a, VDone) -> analyze l a
| VSeq (VDone, a) -> analyze l a
(* try to balance nested Par blocks *)
| VPar [a] -> pick_best [a]
| VPar l ->
let n2 = (List.length l) / 2 in
let rec loop n a b =
if n = 0 then
(List.rev b, a)
else
match a with
[] -> failwith "loop"
| x :: y -> loop (n - 1) y (x :: b)
in let (a, b) = loop n2 (reorder l) []
in pick_best [VSeq (VPar a, VPar b)]
| x -> pick_best [x]
in rewrite_declarations true (analyze VSimdInstrOperandSet.empty schedule)
let annotatedscheduleToVsimdinstrs =
let rec annotateToInstrs xs (VAnnotate(_,_,_,_,asch)) = aschToInstrs xs asch
and aschToInstrs xs = function
| VADone -> xs
| VAInstr x -> x::xs
| VASeq(s0,s1) -> annotateToInstrs (annotateToInstrs xs s1) s0
in annotateToInstrs []