-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuuscan.h
502 lines (420 loc) · 17.3 KB
/
uuscan.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
// uuscan.h - light-weight helper functions for recursive descent parsing
// uses _Generic selector, requires C11 or later
// github.com/spinau/uuscan
/*{{{ uuscan.h exports; names beginning with underscore not meant for app use
Macros
accept(t) return true if t scan succeeds
accept(t, &val) return true if t scan succeeds, result in val
acceptall(t1, t2, ...) return true if all terms succeed
expect(t) "expected" uuerror if t fails
expect(t, &val) if t succeeds, result in val
expect(t, &val, char *msg) if t fails, uuerror reports msg string
uuerror(char *fmt, ...) jump out of parse with an error msg
on_uuerror following statement or block is uuerror target
fail(char *lp) fail out of scan with lp at fail point
success(char *lp) return succesful scan, update uu.lp with lp
UUTERMINALS X(t1) X(t2) ... declare terminals
UUDEFINE(t) define scan function to terminal t
UUDEFINE(t, <type> *v) with return value ptr
CHAR(x) same as (char)x for use in accept/expect
Functions
uudebug(char *fmt, ...) stderr messages if UUDEBUG defined
char *skipspace(char *) advances over front space
Struct
uu uuscan internals; app must set uu.line and uu.lp
}}}*/
/*{{{ notes
To set up for uu scanning:
#define UUTERMINALS X(T1) X(T2) ... // app-specific scan terminals
#include "uuscan.h"
To initialise input for uuscanning set the following two pointers:
uu.lp = uu.line = <input string>
If uuerror() is used then define the error longjmp target with:
on_uuerror {
... // e.g. puts(uu.errmsg);
}
The two main functions provided for scanning elements are:
accept(x)
Scans for terminal x: returns true if x is correctly scanned, false if the
next element is not x.
expect(x)
Scans for terminal x: if the next input is not x then a scanning error is
raised in the form "x expected at [pos]". If a more application-specific error
message is desired then use a third argument to override the default message.
expect(integer, &i, "address or unit number");
will produce "expected address or unit number at [pos]" on failure to scan integer.
x is a string literal, char *, char, or char literal, or an application-defined
terminal name (like integer in the above example). Literal matching is provided
here for char * and char. The application must provide scanning functions for
app-defined terminals in the same file as uuscan.h is included.
The mechanism to return converted values from a scan back to the caller
uses either an appropriately typed address-of as the second argument, or,
assignment to a UUVAL (union or struct) element if the second argument is omitted.
Second argument to accept and expect:
int i;
accept(integer, &i) // pass address-of variable
float *f;
expect(floatingpoint, &f) // pass address-of pointer; derefence in scanner
Alternatively (or in addition), define a UUVAL union or struct:
UUVAL struct { int i; }
Using UUVAL:
accept(integer); // the scanner for 'integer' will assign uu.i
if (uu.i > 0)
...
A successful scan would typically assign the result to either the dereferenced
address-of argument (if present) or the uu element (if defined) then return
sucess(lp) to update the line pointer uu.lp.
To define app-specific terminals, define UUTERMINALS before including this header,
as shown above. At least one terminal must be defined.
An associated scanning function for each T must also be defined. A convenience
macro UUDEFINE(T) supplies the standard function header that names the scanning
function and provides the necessary arguments.
UUDEFINE(T) // set up fn header for scan terminal T
or
UUDEFINE(T, <type> *res) // optional result return ptr
{
// initial whitespace has been skipped unless matching on space specifically
// char *lp is predefined as local ptr to next input
...
return fail(lp); // return false and set uu.lpfail ptr
...
*res = <result of scan>
return success(lp); // return true and update uu.lp to next char of input
}
"terminal" is loosely defined. Scanning for a terminal usually means scanning a
single lexical element, but there is nothing preventing a scanner from processing
more complex forms.
Syntax and conversion error handling is done with uuerror() with normal printf()
style formatting. uuerror() formats the message string and does a longjmp to the
on_uuerror { ... } block where the error message can be printed or dealt with.
uuerror() allows errors to be raised even in deeply nested or recursed parsing without
having to unwind the calls programmatically. The on_uuerror { ... } block can print
an error message and either exit(1) or drop through to collect the next input line.
Because uuscan.h sets up all terminals statically at compile-time this method
is best suited to a single-source file for a particular parsing job, at least
the part requiring the accept/expect's. This file-separation also allows multiple
parsing each with different terminal sets to coexist within one executable.
If compiled with -DUUDEBUG then uudebugf() output is activated when environment
variabe UUDEBUG is defined.
Sep22-SP simplified from a previous version
Dec23-SP 2nd arg method of value returns; uu.val retired
}}}*/
//{{{ includes & clang silencers
#ifndef _STDIO_H
#include <stdio.h>
#endif
#ifndef _STDBOOL_H
#include <stdbool.h>
#endif
#ifndef _SETJMP_H
#include <setjmp.h>
#endif
#ifndef _CTYPE_H
#include <ctype.h>
#endif
#ifndef _STRING_H
#include <string.h>
#endif
#ifndef _STDARG_H
#include <stdarg.h>
#endif
#pragma clang diagnostic ignored "-Wformat-extra-args"
#pragma clang diagnostic ignored "-Wparentheses"
#pragma clang diagnostic ignored "-Wdeprecated-non-prototype"
#pragma clang diagnostic ignored "-Wmain-return-type"
// UUDEFINE(t)/UUDEFINE(t,&v) optional 2nd arg triggers warning:
#pragma clang diagnostic ignored "-Wc2x-extensions"
//}}}
// *** declare terminals in application prior to including uuscan.h: ***
#ifndef UUTERMINALS
#error define UUTERMINALS with 1 or more terminal names using X(..)
#endif
#ifndef inline
#define inline __always_inline
#endif
//{{{ VA_COUNT macro
// this is a hack to count number of arguments in a variadic macro
// works for up to 10 args (last number in _argc_n - 1)
// VA_COUNT must be able to detect zero arguments
#ifndef VA_COUNT
#define _ARGC_N( _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, N, ...) N
#define _ARGSEQ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
#define _ARGC(...) _ARGC_N(__VA_ARGS__)
// count the number of arguments:
#define VA_COUNT(...) _ARGC(_, ##__VA_ARGS__, _ARGSEQ)
#endif
//}}}
//{{{ UUDEBUG
#ifdef UUDEBUG
#define uudebugf(...) do{ \
if (getenv("UUDEBUG") == NULL) break; \
fprintf(stderr, "uuscan: %s %d: ", uu.fn, uu.linenum); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, " lp=["); \
for (char *cp = uu.lp; *cp; ++cp) \
if (isprint(*cp)) \
fputc(*cp, stderr); \
else \
fprintf(stderr, "\\%03o", *cp); \
fputc(']', stderr); \
fputc('\n', stderr); }while(0)
#else
#define uudebugf(...) /**/
#endif
//}}}
static struct uuscan {
char *line; // ptr to current line being scanned
char *lp; // advancing ptr into line updated after scan by accept(),expect()
char *lpstart; // start of current input scan
char *lpfail; // scan failed ptr into line
int len; // length of successfully scanned element
char ch; // saves last char literal scanned
char *msg; // ptr to message for on_error target; usually local _uumsgbuf
char *failmsg; // additional fail message:
// appended to expect() fail uuerror message
// could also be used after failed accept() by caller
void (*callback)(); // if non NULL, callback is called before uuerror() jump is made
// allows for clean-up code prior to uuerror message
// uuerror() will reset to NULL
jmp_buf errjmp; // uuerror() jump target: on_uuerror
#ifdef UUDEBUG
const char *fn;
int linenum;
#endif
#ifdef UUVAL
UUVAL; // converted terminal value temporaries, examples:
// #define UUVAL struct { int i; char *str; }
// #define UUVAL union { int i; char *str; }
#endif
} uu;
static char _uumsgbuf[80];
#define UUDEFINE(...) _uudefine(VA_COUNT(__VA_ARGS__), __VA_ARGS__)
#define _uudefine(n,...) CONCAT(_uudefine,n)(__VA_ARGS__)
#define _uudefine0(...) ;
#define _uudefine1(x) static bool _scan_##x(char *lp, void *)
#define _uudefine2(x,res) static bool _scan_##x(char *lp, res)
// autobuild terminal enum constants:
#define X(t) t=__COUNTER__,
static enum { UUTERMINALS } terms;
#undef X
// enum must be used to save current __COUNTER__ value
enum { UUTERMCOUNT = __COUNTER__ };
// autobuild forward decl of _scan_T_() functions:
#define X(t) static bool _scan_##t();
UUTERMINALS
#undef X
// autobuild list of ptrs to scanning functions:
static struct uuterm {
bool (*fn)();
char *name;
} uuterms[UUTERMCOUNT] = {
#define X(t) [t]={_scan_##t, #t},
UUTERMINALS
};
#undef X
// accept() does nothing
// accept(t) call scanner t depending on type selection
// accept(t, &res) call scanner t with appropriate ptr to save successful result
#define accept(...) _accept(VA_COUNT(__VA_ARGS__), __VA_ARGS__)
#define _accept(n,...) CONCAT(_accept,n)(__VA_ARGS__)
#define _accept0(...) ;
#define _accept1(x) __accept(x, NULL)
#define _accept2(x,res) __accept(x, res)
#if UUDEBUG
#define __accept(x,res) \
(uu.fn=__FUNCTION__, uu.linenum=__LINE__, _Generic(x, \
const char*: __scan_literal, \
char*: __scan_literal, \
char: __scan_char, \
int: __scan_term, \
default: __unknown3) (x, uu.lp, res))
#else
#define __accept(x,res) \
_Generic(x, \
const char*: __scan_literal, \
char*: __scan_literal, \
char: __scan_char, \
int: __scan_term, \
default: __unknown3) (x, uu.lp, res)
#endif
#define CONCAT(a,b) a ## b
// macros for supporting acceptall(...) with up to 5 terminals
#define _ACCEPT4(t,...) __accept(t,NULL) && _ACCEPT3(__VA_ARGS__)
#define _ACCEPT3(t,...) __accept(t,NULL) && _ACCEPT2(__VA_ARGS__)
#define _ACCEPT2(t,...) __accept(t,NULL) && _ACCEPT1(__VA_ARGS__)
#define _ACCEPT1(t,...) __accept(t,NULL) && _ACCEPT0(__VA_ARGS__)
#define _ACCEPT0(t,...) __accept(t,NULL)
#define _ACCEPTALL(n,t,...) CONCAT(_ACCEPT,n)(t, __VA_ARGS__)
// acceptall will call accept() on each argument until failure or all accepted
// acceptall scans only, does not save scan result (result 2nd arg is null)
// if any term fails then uu.lp is unchanged
#define acceptall(t,...) \
({ char *savelp = uu.lp; bool r=false; \
if (_ACCEPTALL(VA_COUNT(__VA_ARGS__), t, __VA_ARGS__)) r=true; \
else uu.lp = savelp; \
r; })
#define uuerrorpos() (int)((uu.lpfail - uu.line) + 1)
// fail(cp) will return false from scanner with cp pointing to fail position
//
// fail(cp,msg) same as fail(cp), also sets uu.failmsg=msg
// the optional msg is to return to caller a lexical error that has
// been detected but not reported. the caller deals with the error
// either by ignoring, reporting, or recovering as appropriate.
//
// [in general, a fail from a scanner indicates the lexical element
// was not recognised. however, in some cases a lexical error should be caught
// (e.g., an almost well-formed float format) and this is a way of returning
// an error indication without having to resort to uuerror() immediately]
#define _fail_1(x) (uu.lpfail=(x), uu.failmsg=NULL, false) // VA_COUNT 1
#define _fail_2(x,y) (uu.lpfail=(x), uu.failmsg=(y), false) // VA_COUNT 2
#define _fail(n,...) CONCAT(_fail_, n)
#define fail(...) _fail(VA_COUNT(__VA_ARGS__))(__VA_ARGS__)
#define success(x) (uu.lp=(x), true)
#define expect(...) _expect(VA_COUNT(__VA_ARGS__), __VA_ARGS__)
#define _expect(n,...) CONCAT(_expect,n)(__VA_ARGS__)
#define _expect1(x) __expect(x, NULL, NULL)
#define _expect2(x,res) __expect(x, res, NULL)
#define _expect3(x,res,msg) __expect(x, res, msg)
#define __expect(x,res,msg) do { \
if (accept(x,res)==false) { \
_expect_msg(x,msg); \
longjmp(uu.errjmp,1); } \
}while(0)
#define _expect_msg(x, msg) _Generic(x, \
const char*: _msg_str, \
char*: _msg_str, \
char: _msg_char, \
int: _msg_term, \
default: __unknown2)(x, msg)
#define on_uuerror uu.msg = _uumsgbuf; if (setjmp(uu.errjmp))
#define uuerror(...) do{ \
sprintf(uu.msg,## __VA_ARGS__, ""); \
if (uu.callback) { uu.callback(); uu.callback=NULL; } \
longjmp(uu.errjmp,1); \
} while(0)
inline static inline char *
skipspace(char *cp)
{
while (isspace(*cp))
++cp;
return cp;
}
// scan for a single char
inline static inline bool
__scan_char(char wanted, char *lp, void *res)
{
#if UUDEBUG
if (isprint(wanted))
uudebugf("scan_char '%c'", wanted);
else
uudebugf("scan_char '\\%03o'", wanted);
#endif
if (isspace(wanted) && isspace(*lp)) {
++lp;
if (res)
*(char *)res = wanted;
return success(lp);
}
lp = skipspace(lp);
if (*lp == wanted) {
if (*lp) // don't incr past null char
++lp;
if (res)
*(char *)res = wanted;
uu.ch = wanted;
return success(lp);
}
return fail(lp);
}
// scan for an app-defined terminal index x
static inline bool
__scan_term(int x, char *lp, void *res)
{
lp = skipspace(lp);
uu.lpfail = uu.lpstart = lp;
uu.failmsg = NULL;
uu.len = 0;
#if UUDEBUG
bool ret = (uuterms[x].fn)(lp, res);
uudebugf("scan_term %s: %s\n", uuterms[x].name, ret? "success" : "fail");
return ret;
#else
return (uuterms[x].fn)(lp, res);
#endif
}
// scan for literal text
static bool
__scan_literal(const char *wanted, char *lp, void *res)
{
uudebugf("scan_literal \"%s\"", wanted);
if (*lp == '\0')
return *wanted=='\0'? success(lp) : fail(lp); // allows for accept("")
if (!isspace(*wanted)) // if not looking for space, skip over it
lp = skipspace(lp);
int l = strlen(wanted);
uu.lpstart = lp;
if (strlen(lp) < l)
return fail(lp);
if (strncmp(wanted, lp, l) == 0) {
if (isalpha(wanted[l-1]) && isalpha(lp[l]))
return fail(lp);
else if (isdigit(wanted[l-1]) && isdigit(lp[l]))
return fail(lp);
else // punctuation (not alpha or digit) is a single char match
lp += l;
} else
return fail(lp);
uu.len = l;
if (res) {
// this will be cause of core dumps if res not a ptr to ptr
char **cp = (char **)res;
*cp = uu.lpstart;
}
return success(lp);
}
// these are never called, they catch unknown type selector in the _Generic(..)
// accept() with unknown type is a compile error
static void __unknown3(void *a, void *b, void *c) {}
static void __unknown2(void *a, void *b) {}
// literal, char, and user-term messages for failed expect()
static void
_msg_str(char *s, char *msg)
{
if (msg == NULL)
sprintf(uu.msg, "expected \"%s\" at pos %d", s, uuerrorpos());
else
sprintf(uu.msg, "%s at pos %d", msg, uuerrorpos());
}
static void
_msg_char(char c, char *msg)
{
if (msg)
sprintf(uu.msg, "%s at pos %d", msg, uuerrorpos());
else {
if (isprint(c))
sprintf(uu.msg, "expected '%c' at pos %d", c, uuerrorpos());
else
sprintf(uu.msg, "expected '\\%03o' at pos %d", c, uuerrorpos());
}
}
static void
_msg_term(int t, char *msg)
{
sprintf(uu.msg, "%s%s at pos %d",
msg==NULL? "expected " : "",
msg==NULL? uuterms[t].name : msg, uuerrorpos());
if (uu.failmsg) {
strcat(uu.msg, " (");
strcat(uu.msg, uu.failmsg);
strcat(uu.msg, ")");
}
}
// accept('x') -- a char constant is promoted to int and would select
// __scan_term in _Generic, so casting to char is required for char literals:
// accept((char)'x'), or use convenience macros:
#define CHAR(x) (char)x
// e.g.
// #define EQ CHAR('=')
// ...
// accept(CHAR('*'));
// expect(EQ);