-
Notifications
You must be signed in to change notification settings - Fork 2.6k
/
Copy pathreader.bas
198 lines (174 loc) · 6.83 KB
/
reader.bas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
REM > reader library for mal in BBC BASIC
REM ** Reader **
REM The Reader object is implemented as an array and a mutable pointer.
DEF FNreader_peek(tokens$(), RETURN tokptr%)
=tokens$(tokptr%)
DEF FNreader_next(token$(), RETURN tokptr%)
tokptr% += 1
=tokens$(tokptr% - 1)
DEF FNread_str(src%)
LOCAL ntokens%, tokptr%, tokens$()
DIM tokens$(2048)
ntokens% = FNtokenize(src%, tokens$())
tokptr% = 0
=FNread_form(tokens$(), tokptr%)
REM ** Tokenizer **
DEF FNtokenize(src%, tokens$())
REM The tokenizer is implemented explicitly as a deterministic
REM finite automaton.
LOCAL p%, state%, tok$, tokptr%, c$, rc$, match$, action%
LOCAL DATA
state% = 1
tokptr% = 0
tok$ = ""
FOR p% = 1 TO FNstring_len(src%)
c$ = FNstring_chr(src%, p%)
rc$ = c$
REM Convert some characters to ones that are easier to put into
REM DATA statements. These substitutions are only used for
REM matching: the token still contains the original character.
CASE ASC(c$) OF
REM Fold some upper-case letters together so that we can re-use
REM them to represent more awkward characters.
WHEN 78, 81: c$ = "A"
REM Now convert newlines into "N"
WHEN 10: c$ = "N"
REM These are the other characters that Perl's "\s" escape matches.
WHEN 9, 11, 12, 13: c$ = " "
REM Brandy has a bug whereby it doesn't correctly parse strings
REM in DATA statements that begin with quotation marks, so convert
REM quotation marks to "Q".
WHEN 34: c$ = "Q"
ENDCASE
REM The state table consists of a DATA statement for each current
REM state, which triples representing transitions. Each triple
REM consists of a string of characters to match, an action, and a
REM next state. A matching string of "" matches any character,
REM and hence marks the end of a state.
REM Actions are:
REM 0: Add this character to the current token
REM 1: Emit token; start a new token with this character
REM 5: Emit token; skip this character
RESTORE +state%
REM state 1: Initial state, or inside a bare word
DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",0,1
REM state 3: Just seen the end of a token
DATA " N,",5,1, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
REM state 5: Just seen a "~"
DATA " N,",5,1, "@",0,3, "~",1,5, "[]{}()'`^@",1,3, Q,1,7, ";",5,11, "",1,1
REM state 7: Inside a quoted string
DATA "\",0,9, Q,0,3, "",0,7
REM state 9: After a backslash in a string
DATA "",0,7
REM state 11: Inside a comment
DATA N,5,3, "",5,11
REM Find a matching transition from the current state.
REM PRINT ;state%;"-->";
REPEAT
READ match$, action%, state%
REM PRINT "[";match$;"](";action%;",";state%;")";
UNTIL match$ = "" OR INSTR(match$, c$) > 0
REM PRINT ;"-->";state%
REM Execute any actions.
IF action% AND 1 AND tokens$(tokptr%) <> "" THEN tokptr% += 1
IF (action% AND 4) = 0 THEN tokens$(tokptr%) += rc$
NEXT p%
IF tokens$(tokptr%) <> "" THEN tokptr% += 1
=tokptr%
REM ** More Reader **
DEF FNread_form(tokens$(), RETURN tokptr%)
LOCAL tok$, x%
tok$ = FNreader_peek(tokens$(), tokptr%)
CASE tok$ OF
WHEN "" : ERROR &40E80930, "Unexpected end of input"
WHEN "(": =FNread_list(tokens$(), tokptr%)
WHEN "[": =FNread_vector(tokens$(), tokptr%)
WHEN "{": =FNread_hashmap(tokens$(), tokptr%)
WHEN ")", "]", "}": ERROR &40E80931, "Unexpected '"+tok$ +"'"
WHEN "'": =FNreader_macro("quote", tokens$(), tokptr%)
WHEN "`": =FNreader_macro("quasiquote", tokens$(), tokptr%)
WHEN "~": =FNreader_macro("unquote", tokens$(), tokptr%)
WHEN "~@":=FNreader_macro("splice-unquote", tokens$(), tokptr%)
WHEN "@": =FNreader_macro("deref", tokens$(), tokptr%)
WHEN "^": =FNread_with_meta(tokens$(), tokptr%)
ENDCASE
=FNread_atom(tokens$(), tokptr%)
DEF FNread_list(tokens$(), RETURN tokptr%)
LOCAL tok$
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "("
=FNread_list_tail(tokens$(), tokptr%, ")")
DEF FNread_vector(tokens$(), RETURN tokptr%)
LOCAL tok$
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "["
=FNas_vector(FNread_list_tail(tokens$(), tokptr%, "]"))
DEF FNread_list_tail(tokens$(), RETURN tokptr%, term$)
LOCAL tok$, car%, cdr%
IF FNreader_peek(tokens$(), tokptr%) = term$ THEN
tok$ = FNreader_next(tokens$(), tokptr%)
=FNempty
ENDIF
car% = FNread_form(tokens$(), tokptr%)
cdr% = FNread_list_tail(tokens$(), tokptr%, term$)
=FNalloc_pair(car%, cdr%)
DEF FNread_hashmap(tokens$(), RETURN tokptr%)
LOCAL tok$, map%, key%, val%
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "{"
map% = FNempty_hashmap
WHILE FNreader_peek(tokens$(), tokptr%) <> "}"
key% = FNread_form(tokens$(), tokptr%)
IF NOT FNis_string(key%) ERROR &40E80932, "Hash-map key must be a string"
val% = FNread_form(tokens$(), tokptr%)
map% = FNhashmap_set(map%, FNunbox_string(key%), val%)
ENDWHILE
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip over "}"
=map%
DEF FNreader_macro(quote$, token$(), RETURN tokptr%)
LOCAL tok$
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip quoting token
=FNalloc_list2(FNalloc_symbol(quote$), FNread_form(tokens$(), tokptr%))
DEF FNread_with_meta(token$(), RETURN tokptr%)
LOCAL tok$, wm%, base%, meta%
tok$ = FNreader_next(tokens$(), tokptr%) : REM skip '^' token
wm% = FNalloc_symbol("with-meta")
meta% = FNread_form(tokens$(), tokptr%)
base% = FNread_form(tokens$(), tokptr%)
=FNalloc_list3(wm%, base%, meta%)
DEF FNis_token_numeric(tok$)
LOCAL i%, c%
IF LEFT$(tok$, 1) = "-" THEN tok$ = MID$(tok$, 2)
IF LEN(tok$) = 0 THEN =FALSE
FOR i% = 1 TO LEN(tok$)
c% = ASC(MID$(tok$, i%, 1))
IF c% < &30 OR c% > &39 THEN =FALSE
NEXT i%
=TRUE
DEF FNread_atom(tokens$(), RETURN tokptr%)
LOCAL strval$
strval$ = FNreader_next(tokens$(), tokptr%)
IF strval$ = "nil" THEN =FNnil
IF strval$ = "true" THEN =FNalloc_boolean(TRUE)
IF strval$ = "false" THEN =FNalloc_boolean(FALSE)
IF LEFT$(strval$, 1) = """" THEN =FNalloc_string(FNunquote_string(strval$))
IF LEFT$(strval$, 1) = ":" THEN =FNalloc_string(CHR$(127) + MID$(strval$, 2))
IF FNis_token_numeric(strval$) THEN =FNalloc_int(VAL(strval$))
=FNalloc_symbol(strval$)
DEF FNunquote_string(strval$)
LOCAL inptr%, bs%, out$, c$
IF RIGHT$(strval$, 1) <> """" THEN ERROR &40E80930, "Unexpected end of input"
inptr% = 2
REPEAT
bs% = INSTR(strval$, "\", inptr%)
IF bs% > 0 THEN
out$ += MID$(strval$, inptr%, bs% - inptr%)
c$ = MID$(strval$, bs% + 1, 1)
IF c$ = "n" THEN c$ = CHR$(10)
out$ += c$
inptr% = bs% + 2
ENDIF
UNTIL bs% = 0
IF inptr% = LEN(strval$) + 1 THEN ERROR &40E80930, "Unexpected end of input"
out$ += MID$(strval$, inptr%, LEN(strval$) - inptr%)
=out$
REM Local Variables:
REM indent-tabs-mode: nil
REM End: