forked from lkundrak/dev86
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfdiv.x
312 lines (253 loc) · 8.03 KB
/
fdiv.x
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#define EF_SIZE 4
! bcc 386 floating point routines (version 2) -- Fdiv, Fdivd, Fdivf
! authors: Timothy Murphy ([email protected]), Bruce Evans
#include "fplib.h"
#define FRAME_SIZE (3 * GENREG_SIZE + PC_SIZE)
.extern Fpushf
.extern fpdivzero
.extern fpoverflow
.extern fpunderflow
! double Fdiv(double x, double y) returns x / y
! pop 2 doubles from stack, divide first by second, and push quotient on stack
! we denote upper and lower dwords of x and y (or their fractions)
! by (xu,xl), (yu,yl)
.globl Fdivf
.align ALIGNMENT
Fdivf:
sub esp,#D_SIZE ! make space for dummy double on stack
push ebp
push edi ! save some regs
push esi
mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
call Fpushf
pop esi ! yl
pop edi ! yu
mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
jmp division
.globl Fdiv
.align ALIGNMENT
Fdiv:
push ebp
push edi ! save some regs
push esi
mov eax,FRAME_SIZE+D_LOW[esp] ! xl
mov edx,FRAME_SIZE+D_HIGH[esp] ! xu
mov esi,FRAME_SIZE+D_SIZE+D_LOW[esp] ! yl
mov edi,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! yu
jmp division
.align ALIGNMENT
exp_y_0:
mov ebx,edi
or ebx,esi
beq zerodivide
mov ebx,#1
fix_y:
test edi,edi ! XXX - sloow
js y_unpacked
shld edi,esi,#1
shl esi,#1
dec bx
jmp fix_y
.align ALIGNMENT
exp_x_0:
mov ecx,edx
or ecx,eax
beq retz
mov ecx,#1 ! change exponent from 0 to 1
fix_x:
test edx,#1 << (REG_BIT-1-2) ! XXX - sloow
jnz x_unpacked
shld edx,eax,#1
shl eax,#1
dec cx
jmp fix_x
! Fdivd pops double from stack, divides it by double at [ebx],
! and pushes quotient back on stack
.globl Fdivd
.align ALIGNMENT
Fdivd:
sub esp,#D_SIZE ! make space for dummy double on stack
push ebp
push edi ! save some regs
push esi
mov eax,FRAME_SIZE-PC_SIZE+D_SIZE[esp] ! move return address ...
mov FRAME_SIZE-PC_SIZE[esp],eax ! ... to usual spot
mov eax,FRAME_SIZE+D_SIZE+D_LOW[esp] ! xl
mov edx,FRAME_SIZE+D_SIZE+D_HIGH[esp] ! xu
mov esi,D_LOW[ebx] ! yl
mov edi,D_HIGH[ebx] ! yu
division:
! The full calculations are
! (xu,xl,0) = yu * (zu,zl) + (0,r,0) (normal 96/32 -> 64 bit division)
! yl * zu = yu * q1 + r1 (32*32 -> 64 bit mul and 64/32 -> 32 bit div)
! so
! (xu,xl,0,0) = (yu,yl) * (zu,zl-q1) + (0,0,r-r1,yl*(q1-zl))
! where the calculations zl-q1, r-r1 and yl*(q1-zl) are more complicated
! than the notation suggests. They may be negative and the one with the
! multiplication may not fit in 32 bits and in both cases the overflow
! has to be moved into higher bit positions.
! See Knuth for why (zu,zl-q1) is the correct 64-bit quotient to within
! 1 bit either way (assuming the normalization x < 2 * y).
! We only need to calculate the remainder (0,0,r-r1,yl*(q1-zl)) to resolve
! tie cases. It tells whether the approximate quotient is too high or too
! low.
#define NTEMPS 5
sub esp,#NTEMPS*GENREG_SIZE ! space to remember values for rounding of tie case
! Offsets from esp for these values (offsets using FRAME_SIZE are invalid
! while these temps are active)
r = 0
q1 = 4
r1 = 8
yl = 12
zl = 16
! Step 1: unpack and normalize x to fraction in edx:eax (left shifted as
! far as possible less 2 so that x < y, and later z < y); unpack and normalize
! y to a fraction in edi:esi (left shifted as far as possible), put difference
! of signs (= sign of quotient) in ecx(D_SIGN_MASK) and difference of exponents
! (= exponent of quotient before normalization) in cx.
mov ebp,edx ! xu
xor ebp,edi ! xu ^ yu
and ebp,#D_SIGN_MASK ! sign of result is difference of signs
! Unpack y first to trap 0 / 0
mov ebx,edi ! remember yu for exponent of y
shld edi,esi,#D_BIT-D_FRAC_BIT ! extract fraction of y ...
shl esi,#D_BIT-D_FRAC_BIT
and ebx,#D_EXP_MASK ! exponent of y
jz exp_y_0
shr ebx,#D_EXP_SHIFT ! in ebx (actually in bx, with high bits 0)
or edi,#D_NORM_MASK << (D_BIT-D_FRAC_BIT) ! normalize
y_unpacked:
! Unpack x
mov ecx,edx ! remember xu for exponent of x
shld edx,eax,#D_BIT-D_FRAC_BIT-2 ! extract fraction of x ...
shl eax,#D_BIT-D_FRAC_BIT-2
and edx,#(D_NORM_MASK << (D_BIT-D_FRAC_BIT-2+1))-1
! XXX - above may be shifted 1 extra unnecessarily
and ecx,#D_EXP_MASK ! exponent of x
jz exp_x_0
shr ecx,#D_EXP_SHIFT ! in ecx (actually in cx, with high bits 0)
or edx,#D_NORM_MASK << (D_BIT-D_FRAC_BIT-2) ! normalize
x_unpacked:
sub cx,bx ! not ecx,ebx because we want to use high bit for sign
add cx,#D_EXP_BIAS ! adjust exponent of quotient
or ecx,ebp ! include sign with exponent
! Step 2: quotient of fractions -> (edx,eax)
! 2a: (xu,xl,0) div yu = (zu,zl) -> (ebx,esi)
div eax,edi ! (xu,xl) div yu = zu in eax; remainder (rem) in edx
mov ebx,eax ! save zu in ebx
sub eax,eax ! clear eax: (edx,eax) = (rem,0)
div eax,edi ! (rem,0) div yu = zl in eax
mov r[esp],edx
mov zl[esp],eax
xchg eax,esi ! store zl in esi; save yl in eax
mov yl[esp],eax
! 2b: (yl * zu) div yu -> (0,eax)
mul eax,ebx ! yl * zu -> (edx,eax)
div eax,edi ! (yl * zu) div yu in eax
mov q1[esp],eax
mov r1[esp],edx
! 2c: (xu,xl) / (yu,yl) = (zu,zl) - (yl * zu) div yu -> (edx,eax)
mov edx,ebx ! zu
xchg eax,esi ! eax <- zl; esi <- (yl * zu) div yu
sub eax,esi
sbb edx,#0
! Step 3: normalise quotient
test edx,#1 << (REG_BIT-2) ! is fraction too small? (can only be by 1 bit)
jnz div4
shld edx,eax,#1 ! yes; multiply fraction ...
shl eax,#1 ! ... by 2 ...
dec cx ! ... and decrement exponent
! Step 4: shift and round
div4:
mov ebx,eax ! save for rounding
shrd eax,edx,#D_BIT-D_FRAC_BIT-1 ! shift fraction of result ...
shr edx,#D_BIT-D_FRAC_BIT-1 ! ... to proper position
and ebx,#(1 << (D_BIT-D_FRAC_BIT-1))-1 ! look at bits shifted out
cmp ebx,#D_NORM_MASK >> (D_BIT-D_FRAC_BIT) ! compare with middle value
jb div5 ! below middle, don't round up
ja roundup ! above middle, round up
! The low bits don't contain enough information to resolve the tie case,
! because the quotient itself is only an approximation.
! Calculate the exact remainder.
! This case is not very common, so don't worry much about speed.
! Unfortunately we had to save extra in all cases to prepare for it.
push edx
push eax
sub esi,esi ! the calculation requires 33 bits - carry to here
mov eax,2*GENREG_SIZE+q1[esp]
sub eax,2*GENREG_SIZE+zl[esp]
pushfd
mul dword EF_SIZE+2*GENREG_SIZE+yl[esp]
popfd
jnc foo
sub edx,2*GENREG_SIZE+yl[esp]
sbb esi,#0
foo:
add edx,2*GENREG_SIZE+r[esp]
adc esi,#0
sub edx,2*GENREG_SIZE+r1[esp]
sbb esi,#0
mov ebx,eax
mov edi,edx
pop eax
pop edx
! Can finally decide rounding of tie case
js div5 ! remainder < 0 from looking at top 64 bits
jnz roundup ! remainder > 0 from looking at top 64 bits
or edi,ebx ! test bottom 64 bits
jnz roundup ! remainder > 0
test al,#1 ! at last we know it is the tie case, check parity bit
jz div5 ! already even, otherwise round up to make even
roundup:
add eax,#1 ! add rounding bit
adc edx,#0
test edx,#D_NORM_MASK << 1 ! has fraction overflowed (very unlikely)
jz div5
! Why were the shifts commented out?
shrd eax,edx,#1 ! yes, divide fraction ...
shr edx,#1 ! ... by 2 ...
inc cx ! ... and increment exponent
! Step 5: put it all together
div5:
mov ebx,ecx ! extract sign
and ebx,D_SIGN_MASK
cmp cx,#D_EXP_INFINITE ! is exponent too big?
jge overflow
test cx,cx
jle underflow
shl ecx,#D_EXP_SHIFT
and edx,#D_FRAC_MASK ! remove norm bit
or edx,ecx ! include exponent ...
or edx,ebx ! ... and sign
return:
add esp,#NTEMPS*GENREG_SIZE ! reclaim temp space
mov FRAME_SIZE+D_SIZE+D_LOW[esp],eax ! "push" lower dword of product ...
mov FRAME_SIZE+D_SIZE+D_HIGH[esp],edx ! ... and upper dword
pop esi ! restore registers
pop edi
pop ebp
ret #D_SIZE
retz:
sub edx,edx ! clear upper dword
sub eax,eax ! ... and lower dword
jmp return
overflow:
mov edx,ecx ! put sign in usual reg
call fpoverflow
mov eax,ecx ! XXX - wrong reg
jmp return
underflow:
mov esi,edx ! put upper part of fraction in usual reg
mov edx,ecx ! sign
movsx edi,cx ! put shift in usual reg
neg edi
inc edi
call fpunderflow
jmp return
zerodivide:
mov edx,ebp ! sign
call fpdivzero
mov eax,ecx ! XXX - wrong reg
jmp return