Skip to content

Commit b829b5c

Browse files
committed
Implement mp_parse_node_free; print properly repr(string).
1 parent e0722ee commit b829b5c

File tree

15 files changed

+115
-62
lines changed

15 files changed

+115
-62
lines changed

py/builtineval.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) {
2424
const byte *str = mp_obj_str_get_data(o_in, &str_len);
2525

2626
// create the lexer
27-
mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0);
27+
mp_lexer_t *lex = mp_lexer_new_from_str_len(MP_QSTR__lt_string_gt_, (const char*)str, str_len, 0);
28+
qstr source_name = mp_lexer_source_name(lex);
2829

2930
// parse the string
3031
qstr parse_exc_id;
3132
const char *parse_exc_msg;
3233
mp_parse_node_t pn = mp_parse(lex, MP_PARSE_EVAL_INPUT, &parse_exc_id, &parse_exc_msg);
33-
qstr source_name = mp_lexer_source_name(lex);
3434
mp_lexer_free(lex);
3535

3636
if (pn == MP_PARSE_NODE_NULL) {
@@ -40,6 +40,7 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) {
4040

4141
// compile the string
4242
mp_obj_t module_fun = mp_compile(pn, source_name, false);
43+
mp_parse_node_free(pn);
4344

4445
if (module_fun == mp_const_none) {
4546
// TODO handle compile error correctly

py/builtinimport.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
2929
}
3030
*/
3131

32-
uint mod_name_l;
33-
const byte *mod_name_s = mp_obj_str_get_data(args[0], &mod_name_l);
34-
qstr mod_name = qstr_from_strn((const char*)mod_name_s, mod_name_l);
32+
qstr mod_name = mp_obj_str_get_qstr(args[0]);
3533

3634
mp_obj_t loaded = mp_obj_module_get(mod_name);
3735
if (loaded != MP_OBJ_NULL) {
@@ -44,6 +42,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
4442
// TODO handle lexer error correctly
4543
return mp_const_none;
4644
}
45+
qstr source_name = mp_lexer_source_name(lex);
4746

4847
// create a new module object
4948
mp_obj_t module_obj = mp_obj_new_module(mod_name);
@@ -60,7 +59,6 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
6059
qstr parse_exc_id;
6160
const char *parse_exc_msg;
6261
mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
63-
qstr source_name = mp_lexer_source_name(lex);
6462
mp_lexer_free(lex);
6563

6664
if (pn == MP_PARSE_NODE_NULL) {
@@ -72,6 +70,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
7270

7371
// compile the imported script
7472
mp_obj_t module_fun = mp_compile(pn, source_name, false);
73+
mp_parse_node_free(pn);
7574

7675
if (module_fun == mp_const_none) {
7776
// TODO handle compile error correctly

py/emitcpy.c

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -192,29 +192,26 @@ static void print_quoted_str(qstr qstr, bool bytes) {
192192
if (bytes) {
193193
printf("b");
194194
}
195-
bool quote_single = false;
195+
int quote_char = '\'';
196196
if (has_single_quote && !has_double_quote) {
197-
printf("\"");
198-
} else {
199-
quote_single = true;
200-
printf("'");
197+
quote_char = '"';
201198
}
202-
for (int i = 0; i < len; i++) {
203-
if (str[i] == '\n') {
204-
printf("\\n");
205-
} else if (str[i] == '\\') {
199+
printf("%c", quote_char);
200+
for (const char *s = str, *top = str + len; s < top; s++) {
201+
if (*s == quote_char) {
202+
printf("\\%c", quote_char);
203+
} else if (*s == '\\') {
206204
printf("\\\\");
207-
} else if (str[i] == '\'' && quote_single) {
208-
printf("\\'");
205+
} else if (32 <= *s && *s <= 126) {
206+
printf("%c", *s);
207+
} else if (*s == '\n') {
208+
printf("\\n");
209+
// TODO add more escape codes here
209210
} else {
210-
printf("%c", str[i]);
211+
printf("\\x%02x", (*s) & 0xff);
211212
}
212213
}
213-
if (has_single_quote && !has_double_quote) {
214-
printf("\"");
215-
} else {
216-
printf("'");
217-
}
214+
printf("%c", quote_char);
218215
}
219216

220217
static void emit_cpy_load_const_str(emit_t *emit, qstr qstr, bool bytes) {

py/lexer.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,8 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
493493
}
494494
c = num;
495495
} else {
496-
// TODO error message
497-
assert(0);
496+
// unrecognised escape character; CPython lets this through verbatim as '\' and then the character
497+
vstr_add_char(&lex->vstr, '\\');
498498
}
499499
break;
500500
}
@@ -644,10 +644,10 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
644644
}
645645
}
646646

647-
mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
647+
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
648648
mp_lexer_t *lex = m_new(mp_lexer_t, 1);
649649

650-
lex->source_name = qstr_from_str(src_name);
650+
lex->source_name = src_name;
651651
lex->stream_data = stream_data;
652652
lex->stream_next_char = stream_next_char;
653653
lex->stream_close = stream_close;

py/lexer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,8 +124,8 @@ typedef struct _mp_lexer_t mp_lexer_t;
124124

125125
void mp_token_show(const mp_token_t *tok);
126126

127-
mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
128-
mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len);
127+
mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
128+
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len);
129129

130130
void mp_lexer_free(mp_lexer_t *lex);
131131
qstr mp_lexer_source_name(mp_lexer_t *lex);

py/lexerstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ static void str_buf_free(mp_lexer_str_buf_t *sb) {
2828
m_del_obj(mp_lexer_str_buf_t, sb);
2929
}
3030

31-
mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len) {
31+
mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len) {
3232
mp_lexer_str_buf_t *sb = m_new_obj(mp_lexer_str_buf_t);
3333
sb->free_len = free_len;
3434
sb->src_beg = str;

py/lexerunix.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
2828
return NULL;
2929
}
3030

31-
return mp_lexer_new_from_str_len(filename, data, size, size);
31+
return mp_lexer_new_from_str_len(qstr_from_str(filename), data, size, size);
3232
}
3333

3434
/******************************************************************************/

py/obj.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in);
287287
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
288288
uint mp_obj_str_get_hash(mp_obj_t self_in);
289289
uint mp_obj_str_get_len(mp_obj_t self_in);
290+
qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr
290291
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
291292
const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
292293

py/objstr.c

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,39 @@ void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj
4040
if (kind == PRINT_STR && !is_bytes) {
4141
print(env, "%.*s", str_len, str_data);
4242
} else {
43+
// this escapes characters, but it will be very slow to print (calling print many times)
44+
bool has_single_quote = false;
45+
bool has_double_quote = false;
46+
for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
47+
if (*s == '\'') {
48+
has_single_quote = true;
49+
} else if (*s == '"') {
50+
has_double_quote = true;
51+
}
52+
}
4353
if (is_bytes) {
4454
print(env, "b");
4555
}
46-
// TODO need to escape chars etc
47-
print(env, "'%.*s'", str_len, str_data);
56+
int quote_char = '\'';
57+
if (has_single_quote && !has_double_quote) {
58+
quote_char = '"';
59+
}
60+
print(env, "%c", quote_char);
61+
for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
62+
if (*s == quote_char) {
63+
print(env, "\\%c", quote_char);
64+
} else if (*s == '\\') {
65+
print(env, "\\\\");
66+
} else if (32 <= *s && *s <= 126) {
67+
print(env, "%c", *s);
68+
} else if (*s == '\n') {
69+
print(env, "\\n");
70+
// TODO add more escape codes here if we want to match CPython
71+
} else {
72+
print(env, "\\x%02x", *s);
73+
}
74+
}
75+
print(env, "%c", quote_char);
4876
}
4977
}
5078

@@ -474,13 +502,17 @@ bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
474502
}
475503
}
476504

505+
void bad_implicit_conversion(mp_obj_t self_in) __attribute__((noreturn));
506+
void bad_implicit_conversion(mp_obj_t self_in) {
507+
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in)));
508+
}
509+
477510
uint mp_obj_str_get_hash(mp_obj_t self_in) {
478511
if (MP_OBJ_IS_STR(self_in)) {
479512
GET_STR_HASH(self_in, h);
480513
return h;
481514
} else {
482-
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
483-
mp_obj_get_type_str(self_in)));
515+
bad_implicit_conversion(self_in);
484516
}
485517
}
486518

@@ -489,8 +521,20 @@ uint mp_obj_str_get_len(mp_obj_t self_in) {
489521
GET_STR_LEN(self_in, l);
490522
return l;
491523
} else {
492-
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
493-
mp_obj_get_type_str(self_in)));
524+
bad_implicit_conversion(self_in);
525+
}
526+
}
527+
528+
// use this if you will anyway convert the string to a qstr
529+
// will be more efficient for the case where it's already a qstr
530+
qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
531+
if (MP_OBJ_IS_QSTR(self_in)) {
532+
return MP_OBJ_QSTR_VALUE(self_in);
533+
} else if (MP_OBJ_IS_TYPE(self_in, &str_type)) {
534+
mp_obj_str_t *self = self_in;
535+
return qstr_from_strn((char*)self->data, self->len);
536+
} else {
537+
bad_implicit_conversion(self_in);
494538
}
495539
}
496540

@@ -502,8 +546,7 @@ const char *mp_obj_str_get_str(mp_obj_t self_in) {
502546
(void)l; // len unused
503547
return (const char*)s;
504548
} else {
505-
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
506-
mp_obj_get_type_str(self_in)));
549+
bad_implicit_conversion(self_in);
507550
}
508551
}
509552

@@ -513,8 +556,7 @@ const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len) {
513556
*len = l;
514557
return s;
515558
} else {
516-
nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
517-
mp_obj_get_type_str(self_in)));
559+
bad_implicit_conversion(self_in);
518560
}
519561
}
520562

py/parse.c

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
#define RULE_ARG_OPT_TOK (0x3000)
2727
#define RULE_ARG_OPT_RULE (0x4000)
2828

29+
#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)
30+
2931
// (un)comment to use rule names; for debugging
3032
//#define USE_RULE_NAME (1)
3133

@@ -135,15 +137,23 @@ mp_parse_node_struct_t *parse_node_new_struct(int src_line, int rule_id, int num
135137
return pn;
136138
}
137139

138-
int parse_node_free_struct(mp_parse_node_t pn_in) {
139-
int cnt = 0;
140-
if (MP_PARSE_NODE_IS_STRUCT(pn_in)) {
141-
mp_parse_node_struct_t *pn = (mp_parse_node_struct_t *)pn_in;
142-
int n = pn->kind_num_nodes >> 8;
143-
for (int i = 0; i < n; i++) {
144-
cnt += parse_node_free_struct(pn->nodes[i]);
140+
uint mp_parse_node_free(mp_parse_node_t pn) {
141+
uint cnt = 0;
142+
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
143+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
144+
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
145+
uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
146+
bool adjust = ADD_BLANK_NODE(rule_id);
147+
if (adjust) {
148+
n--;
149+
}
150+
for (uint i = 0; i < n; i++) {
151+
cnt += mp_parse_node_free(pns->nodes[i]);
152+
}
153+
if (adjust) {
154+
n++;
145155
}
146-
m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pn);
156+
m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
147157
cnt++;
148158
}
149159
return cnt;
@@ -174,15 +184,15 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
174184
default: assert(0);
175185
}
176186
} else {
177-
mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t*)pn;
178-
int n = pns2->kind_num_nodes >> 8;
187+
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
188+
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
179189
#ifdef USE_RULE_NAME
180-
printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns2), n);
190+
printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
181191
#else
182-
printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns2), n);
192+
printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
183193
#endif
184-
for (int i = 0; i < n; i++) {
185-
mp_parse_node_print(pns2->nodes[i], indent + 2);
194+
for (uint i = 0; i < n; i++) {
195+
mp_parse_node_print(pns->nodes[i], indent + 2);
186196
}
187197
}
188198
}
@@ -472,7 +482,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr
472482
}
473483

474484
// always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
475-
if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) {
485+
if (ADD_BLANK_NODE(rule->rule_id)) {
476486
emit_rule = true;
477487
push_result_node(parser, MP_PARSE_NODE_NULL);
478488
i += 1;

py/parse.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ typedef struct _mp_parse_node_struct_t {
5353
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
5454

5555
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
56-
int parse_node_free_struct(mp_parse_node_t pn_in);
56+
uint mp_parse_node_free(mp_parse_node_t pn);
5757

5858
void mp_parse_node_print(mp_parse_node_t pn, int indent);
5959

py/qstrdefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,4 +91,5 @@ Q(<listcomp>)
9191
Q(<dictcomp>)
9292
Q(<setcomp>)
9393
Q(<genexpr>)
94+
Q(<string>)
9495
Q(<stdin>)

stm/lexerfatfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
4949
f_read(&fb->fp, fb->buf, sizeof(fb->buf), &n);
5050
fb->len = n;
5151
fb->pos = 0;
52-
return mp_lexer_new(filename, fb, (mp_lexer_stream_next_char_t)file_buf_next_char, (mp_lexer_stream_close_t)file_buf_close);
52+
return mp_lexer_new(qstr_from_str(filename), fb, (mp_lexer_stream_next_char_t)file_buf_next_char, (mp_lexer_stream_close_t)file_buf_close);
5353
}
5454

5555
/******************************************************************************/

stm/main.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ void do_repl(void) {
378378
}
379379
}
380380

381-
mp_lexer_t *lex = mp_lexer_new_from_str_len("<stdin>", vstr_str(&line), vstr_len(&line), 0);
381+
mp_lexer_t *lex = mp_lexer_new_from_str_len(MP_QSTR__lt_stdin_gt_, vstr_str(&line), vstr_len(&line), 0);
382382
qstr parse_exc_id;
383383
const char *parse_exc_msg;
384384
mp_parse_node_t pn = mp_parse(lex, MP_PARSE_SINGLE_INPUT, &parse_exc_id, &parse_exc_msg);
@@ -393,6 +393,7 @@ void do_repl(void) {
393393
// parse okay
394394
mp_lexer_free(lex);
395395
mp_obj_t module_fun = mp_compile(pn, source_name, true);
396+
mp_parse_node_free(pn);
396397
if (module_fun != mp_const_none) {
397398
nlr_buf_t nlr;
398399
uint32_t start = sys_tick_counter;
@@ -439,6 +440,8 @@ bool do_file(const char *filename) {
439440
mp_lexer_free(lex);
440441

441442
mp_obj_t module_fun = mp_compile(pn, source_name, false);
443+
mp_parse_node_free(pn);
444+
442445
if (module_fun == mp_const_none) {
443446
return false;
444447
}

0 commit comments

Comments
 (0)