-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathREADME.vector.scm
253 lines (230 loc) · 9.03 KB
/
README.vector.scm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
;;; Some notes on how to add vectors to the Ur-Scheme compiler.
;; And how much complexity it would add.
;;; Vector implementation.
;; A vector consists of the following:
;; - 4 bytes of the vector magic number.
;; - 4 bytes of vector length "N"
;; - 4N bytes of vector data
(define vector-magic something)
(define-global-procedure 'make-vector 1
(lambda () (get-procedure-arg 0)
(ensure-integer)
(asm-and (const "~3") tos)
(add (const "8") tos)
(emit-malloc)
(mov (const vector-magic) (indirect tos))
(mov tos ebx)
(get-procedure-arg 0)
(mov tos (offset ebx 4))
(lea (offset ebx 8) edi)
(mov tos ecx)
(mov (const nil-value) tos)
(rep-stosd)
(pop)))
;; make-vector: +16
; minus the following:
; (define (extract-string)
; (ensure-string)
; (lea (offset tos 8) ebx) ; string pointer
; (asm-push ebx)
; (mov (offset tos 4) tos)) ; string length
(define (extract-string) (ensure-string) (extract-array))
(define (extract-array)
(asm-push (offset tos 8))
(mov (offset tos 4) tos))
(define (extract-vector) (ensure-vector) (extract-array))
(define ensure-vector
(call-ensure-magic-routine vector-magic "ensure_vector" "not a vector"))
(define (call-ensure-magic-routine magic routine-label errmsg)
(let ((errlabel (new-label)))
(define-error-routine errlabel errmsg)
(add-to-header (lambda ()
(label routine-label)
(if-not-right-magic-jump magic errlabel)
(ret))))
(lambda () (call routine-label)))
;; and then we can eliminate this:
; (define-error-routine "notstring" "not a string")
; (add-to-header (lambda ()
; (label "ensure_string")
; (if-not-right-magic-jump string-magic "notstring")
; (ret)))
; ;; Emit code to ensure that %eax is a string
; (define (ensure-string) (call "ensure_string"))
;; in favor of this:
(define ensure-string
(call-ensure-magic-routine string-magic "ensure_string" "not a string"))
;; ensure-vector total: +12 lines, -6 lines
;; But also we can save 4 lines each on ensure-procedure, ensure-cons,
;; ensure-symbol, and ensure-heap-var, so -16 lines more.
;; Also we would like this
(define-magic-check-primitive 'vector? vector-magic)
;; implemented as follows
(define (define-magic-check-primitive name magic)
(define-global-procedure name 1
(lambda ()
(get-procedure-arg 0)
(if-not-right-magic-jump magic "return_false")
(jmp "return_true"))))
;; which allows us to replace this:
; (define-global-procedure 'string? 1
; (lambda ()
; (get-procedure-arg 0)
; (if-not-right-magic-jump string-magic "return_false")
; (jmp "return_true")))
;; with this:
(define-magic-check-primitive 'string? string-magic)
;; vector? total: +8 -5
;; But this also simplifies procedure?, pair?, and symbol? by 4 lines
;; each :)
;; So then we just need vector-ref and vector-set! primitives, and
;; support in wthunk.
;; string-ref looks like this:
; (define-global-procedure 'string-ref 2
; (lambda ()
; (comment "string-ref primitive procedure")
; (get-procedure-arg 0)
; (extract-string) ; string only
; (get-procedure-arg 1)
; (check-array-bounds)
; (get-procedure-arg 1)
; (scheme-to-native-integer tos)
; (comment "get base address of string data from stack")
; (asm-pop ebx)
; (movzbl (indirect (index-register tos ebx 1)) tos) ; string only
; (native-to-scheme-character tos))) ; string only
;; So you could write:
;; array-ref-with-arg: consumes result of extract-array. leaves base
;; address of array in specified register and index in tos
(define (array-ref-with-arg argnum basereg)
(get-procedure-arg argnum)
(check-array-bounds)
(get-procedure-arg argnum)
(scheme-to-native-integer tos)
(comment "get base address of array data from stack")
(asm-pop basereg))
(define-global-procedure 'string-ref 2
(lambda ()
(comment "string-ref primitive procedure")
(get-procedure-arg 0)
(extract-string)
(array-ref-with-arg 1 ebx)
(movzbl (indirect (index-register tos ebx 1)) tos)
(native-to-scheme-character tos)))
(define-global-procedure 'vector-ref 2
(lambda ()
(comment "vector-ref primitive procedure")
(get-procedure-arg 0)
(extract-vector)
(array-ref-with-arg 1 ebx)
(mov (indirect (index-register ebx tos 4)) tos)))
;; vector-ref total: +22, -13 lines
;; string-set! looks like this:
; (define-global-procedure 'string-set! 3
; (lambda ()
; (comment "string-set! primitive procedure")
; (get-procedure-arg 0)
; (extract-string)
; (get-procedure-arg 1)
; (check-array-bounds)
; (get-procedure-arg 1)
; (scheme-to-native-integer tos)
; (mov tos edi)
; (comment "now retrieve the address of string bytes from the stack")
; (pop)
; (mov tos ebx)
; (get-procedure-arg 2)
; (ensure-character)
; (scheme-to-native-character tos)
; (movb al (indirect (index-register ebx edi 1)))
; (comment "discard the character and base address")
; (pop) (pop)
; (comment "but we need a return value...")
; (get-procedure-arg 0)))
;; But it could be written with the above array-ref-with-arg:
(define-global-procedure 'string-set! 3
(lambda ()
(comment "string-set! primitive procedure")
(get-procedure-arg 0)
(extract-string) ; string only
(array-ref-with-arg 1 ebx)
(comment "save index in index register")
(mov tos edi)
(get-procedure-arg 2)
(ensure-character) ; string only
(scheme-to-native-character tos) ; string only
(movb al (indirect (index-register ebx edi 1))) ; string only
(comment "discard the character and index")
(pop) (pop)
(comment "but we need a return value...")
(get-procedure-arg 0)))
;; which saves another 5 lines or so. But vector-set! is going to
;; look awfully similar. So: vector-set!: -5, +16.
;; And then we need
(define-global-procedure 'vector-length 1
(lambda () (extract-vector)
(asm-pop ebx)
(native-to-scheme-integer tos)))
;; wthunk support looks like
...
((vector? x) (display "#") (wthunk (vector->list x) display))
...
(define (vector->list vec) (vector->list-2 vec 0))
(define (vector->list-2 vec idx)
(if (= idx (vector-length vec)) '()
(cons (vector-ref vec idx) (vector->list-2 vec (1+ idx)))))
;; Complexity costs:
;; vector-set: -5, +16
;; vector-ref: +22, -13 lines
;; vector?: +8 -5 -12
;; ensure-vector: +12 -6 -16
;; extract-vector: +5 -5
;; make-vector: +16
;; vector-length: +4
;; wthunk support..+5
;; total: +88 -62 lines, or net +26 lines.
;;; Crazy idea
;; How much would the compiler really suffer from storing strings as
;; normal vectors of characters? It would pretty much have to buffer for
;; writes, but then we could write string-set! and vector-set! as follows:
(define (string-set! string index char)
(cond ((not (char? char)) (error "trying to put a non-char into a a string"
(list string index char)))
((not (string? string)) (error "trying to string-set! on a non-string"
(list string index char)))
(else (array-set! string index char))))
(define (vector-set! vec index val)
(if (not (vector? vec)) (error "trying to vector-set! on a non-vector"
(list vec index val))
(array-set! vec index val)))
;; And similarly for string-ref and vector-ref.
;; It would make the executable more obscure (you wouldn't be able to
;; see the strings in it very easily), and quadruple the size of its
;; strings. Its actual legitimate string size is 7300 characters or
;; so at the moment, excluding really short strings, although
;; eliminating duplicates would remove about 12% of that. So that
;; would be an extra 20K or so, which I think would be ugly.
;;; Benefits.
;; The main benefit I had in mind was that you could build hash
;; tables. So how much extra complexity are hash tables? Probably
;; the thing to do is to compute a hash value for each symbol which is
;; stored in the symbol's data structure in memory, and then provide a
;; nonstandard accessor for it. Then you'd say something like
(define hash-size 8)
(define (make-dict)
(let ((rv (make-vector hash-size))) (init-dict rv 0) rv))
(define (init-dict vec i)
(if (< i (vector-length vec)) (begin (vector-set! vec i '())
(init-dict vec (1+ i)))))
(define (hash-bucket sym) (remainder (symbol-hash sym) hash-size))
(define (dict-bucket sym dict) (vector-ref dict (hash-bucket sym)))
(define (dict-ref sym dict) (assq sym (dict-bucket sym dict)))
(define (dict-add! rec dict)
(let ((sym (car rec)))
(let ((old-bucket (dict-bucket sym dict)))
(vector-set! dict (hash-bucket sym) (cons rec old-bucket)))))
;; and those 13 lines of code (or something like them) would allow
;; you to make these substitutions where you need a speedup:
;; (make-dict) for '()
;; (dict-ref ...) for (assq ...)
;; (dict-add! ... foo) for (set! foo (cons ... foo))