From c2f10106d82ec2a8027f8f04b8d14cd454e01304 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 07:14:55 +0000 Subject: [PATCH 1/6] json: Optimize escaping string in Encoder --- Modules/_json.c | 148 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 115 insertions(+), 33 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..6819043feb1f5d 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -51,7 +51,7 @@ typedef struct _PyEncoderObject { char sort_keys; char skipkeys; int allow_nan; - PyCFunction fast_encode; + int (*fast_encode)(PyUnicodeWriter *, PyObject*); } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -102,8 +102,8 @@ static PyObject * _encoded_const(PyObject *obj); static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj); static PyObject * encoder_encode_float(PyEncoderObject *s, PyObject *obj); @@ -303,6 +303,89 @@ escape_unicode(PyObject *pystr) return rval; } +// Take a PyUnicode pystr and write an ASCII-only escaped string to writer. +static int +write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t chars; + Py_ssize_t copy_len = 0; + const void *input; + int kind; + int ret; + unsigned char buf[12]; + + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + if (S_CHAR(c)) { + copy_len++; + } + else { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + copy_len = 0; + + chars = ascii_escape_unichar(c, buf, 0); + ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (ret) return ret; + } + } + + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + + return PyUnicodeWriter_WriteChar(writer, '"'); +} + +// Take a PyUnicode pystr and write an escaped string to writer. +static int +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t chars; + Py_ssize_t copy_len = 0; + const void *input; + int kind; + int ret; + unsigned char buf[12]; + + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + if (c <= 0x1f || c == '\\' || c == '"') { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + copy_len = 0; + + chars = ascii_escape_unichar(c, buf, 0); + ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (ret) return ret; + } + else { + copy_len++; + } + } + + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); + if (ret) return ret; + return PyUnicodeWriter_WriteChar(writer, '"'); +} + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { @@ -1255,8 +1338,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); - if (f == py_encode_basestring_ascii || f == py_encode_basestring) { - s->fast_encode = f; + if (f == py_encode_basestring_ascii){ + s->fast_encode = write_escaped_ascii; + } + else if (f == py_encode_basestring) { + s->fast_encode = write_escaped_unicode; } } @@ -1437,33 +1523,35 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj) return PyFloat_Type.tp_repr(obj); } -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) +static int +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { - /* Return the JSON representation of a string */ - PyObject *encoded; + /* Append stolen and then decrement its reference count */ + int rval = PyUnicodeWriter_WriteStr(writer, stolen); + Py_DECREF(stolen); + return rval; +} +static int +encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) +{ if (s->fast_encode) { - return s->fast_encode(NULL, obj); + return s->fast_encode(writer, obj); + } + + /* Return the JSON representation of a string */ + PyObject *encoded = PyObject_CallOneArg(s->encoder, obj); + if (encoded == NULL) { + return -1; } - encoded = PyObject_CallOneArg(s->encoder, obj); if (encoded != NULL && !PyUnicode_Check(encoded)) { PyErr_Format(PyExc_TypeError, "encoder() must return a string, not %.80s", Py_TYPE(encoded)->tp_name); Py_DECREF(encoded); - return NULL; + return -1; } - return encoded; -} - -static int -_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) -{ - /* Append stolen and then decrement its reference count */ - int rval = PyUnicodeWriter_WriteStr(writer, stolen); - Py_DECREF(stolen); - return rval; + return _steal_accumulate(writer, encoded); } static int @@ -1485,10 +1573,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_accumulate(writer, encoded); + return encoder_write_string(s, writer, obj); } else if (PyLong_Check(obj)) { if (PyLong_CheckExact(obj)) { @@ -1577,7 +1662,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs PyObject *item_separator) { PyObject *keystr = NULL; - PyObject *encoded; + int rv; if (PyUnicode_Check(key)) { keystr = Py_NewRef(key); @@ -1617,15 +1702,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs } } - encoded = encoder_encode_string(s, keystr); + rv = encoder_write_string(s, writer, keystr); Py_DECREF(keystr); - if (encoded == NULL) { - return -1; + if (rv != 0) { + return rv; } - if (_steal_accumulate(writer, encoded) < 0) { - return -1; - } if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } From 59e5131a5e2efb25bb0239878acedc50b01d918f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 07:27:34 +0000 Subject: [PATCH 2/6] add news and whatsnew --- Doc/whatsnew/3.14.rst | 7 +++++++ .../Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9e6b69fbc05273..7b4a95a9f4fad8 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1518,6 +1518,13 @@ io :gh:`120754` and :gh:`90102`.) +json +---- + +* Improve the performance of :class:`~json.JSONEncoder` encodes strings. + (Contributed by Inada Naoki in :gh:`133186`.) + + uuid ---- diff --git a/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst new file mode 100644 index 00000000000000..1987d06c27efb3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-30-07-27-30.gh-issue-133186.MSMobf.rst @@ -0,0 +1,2 @@ +Improve the performance of :class:`~json.JSONEncoder` encodes strings. +(Contributed by Inada Naoki in :gh:`133186`.) From ee1a7f693b74db1bbeeaa39ae13f4bf2f1c1191b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 20:38:03 +0900 Subject: [PATCH 3/6] add comment --- Modules/_json.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_json.c b/Modules/_json.c index 6819043feb1f5d..23ec53c19a2698 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -304,6 +304,8 @@ escape_unicode(PyObject *pystr) } // Take a PyUnicode pystr and write an ASCII-only escaped string to writer. +// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of +// return Unicode object. static int write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { @@ -346,6 +348,8 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) } // Take a PyUnicode pystr and write an escaped string to writer. +// Same to escape_unicode(), but write to PyUnicodeWriter instead of +// return Unicode object. static int write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) { From d026be336b7abdf9be4eb4c52ed18abbdb857278 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 30 Apr 2025 22:02:01 +0900 Subject: [PATCH 4/6] apply suggested change --- Modules/_json.c | 69 ++++++++++--------------------------------------- 1 file changed, 13 insertions(+), 56 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 23ec53c19a2698..b543a764414a50 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -48,10 +48,11 @@ typedef struct _PyEncoderObject { PyObject *indent; PyObject *key_separator; PyObject *item_separator; - char sort_keys; - char skipkeys; - int allow_nan; - int (*fast_encode)(PyUnicodeWriter *, PyObject*); + bool sort_keys; + bool skipkeys; + bool allow_nan; + bool fast_encode; + bool ensure_ascii; /* used only when fast_encode == true */ } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -303,55 +304,9 @@ escape_unicode(PyObject *pystr) return rval; } -// Take a PyUnicode pystr and write an ASCII-only escaped string to writer. -// Same to ascii_escape_unicode(), but write to PyUnicodeWriter instead of -// return Unicode object. -static int -write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) -{ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t chars; - Py_ssize_t copy_len = 0; - const void *input; - int kind; - int ret; - unsigned char buf[12]; - - input_chars = PyUnicode_GET_LENGTH(pystr); - input = PyUnicode_DATA(pystr); - kind = PyUnicode_KIND(pystr); - - ret = PyUnicodeWriter_WriteChar(writer, '"'); - if (ret) return ret; - - for (i = 0; i < input_chars; i++) { - Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (S_CHAR(c)) { - copy_len++; - } - else { - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - copy_len = 0; - - chars = ascii_escape_unichar(c, buf, 0); - ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); - if (ret) return ret; - } - } - - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - - return PyUnicodeWriter_WriteChar(writer, '"'); -} - // Take a PyUnicode pystr and write an escaped string to writer. -// Same to escape_unicode(), but write to PyUnicodeWriter instead of -// return Unicode object. static int -write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) { Py_ssize_t i; Py_ssize_t input_chars; @@ -371,7 +326,7 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) for (i = 0; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (c <= 0x1f || c == '\\' || c == '"') { + if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) { ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); if (ret) return ret; copy_len = 0; @@ -1338,15 +1293,17 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->allow_nan = allow_nan; - s->fast_encode = NULL; + s->fast_encode = false; + s->ensure_ascii = false; if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); if (f == py_encode_basestring_ascii){ - s->fast_encode = write_escaped_ascii; + s->fast_encode = true; + s->ensure_ascii = true; } else if (f == py_encode_basestring) { - s->fast_encode = write_escaped_unicode; + s->fast_encode = true; } } @@ -1540,7 +1497,7 @@ static int encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) { if (s->fast_encode) { - return s->fast_encode(writer, obj); + return write_escaped_unicode(writer, obj, s->ensure_ascii); } /* Return the JSON representation of a string */ From 8e5e00b4eb43fbecb31dce2485c36684a86592bc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 1 May 2025 10:33:00 +0900 Subject: [PATCH 5/6] use tmp buffer --- Modules/_json.c | 120 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 97 insertions(+), 23 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index b543a764414a50..d6628fc2e871d6 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -48,11 +48,10 @@ typedef struct _PyEncoderObject { PyObject *indent; PyObject *key_separator; PyObject *item_separator; + int (*fast_encode)(PyUnicodeWriter *, PyObject *); bool sort_keys; bool skipkeys; bool allow_nan; - bool fast_encode; - bool ensure_ascii; /* used only when fast_encode == true */ } PyEncoderObject; #define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op)) @@ -304,18 +303,20 @@ escape_unicode(PyObject *pystr) return rval; } -// Take a PyUnicode pystr and write an escaped string to writer. +#define ESCAPE_BUF_SIZE 200 + +// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii) static int -write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) +write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { Py_ssize_t i; Py_ssize_t input_chars; - Py_ssize_t chars; - Py_ssize_t copy_len = 0; + Py_ssize_t buf_len; const void *input; + Py_UCS4 c = 0; int kind; int ret; - unsigned char buf[12]; + char buf[ESCAPE_BUF_SIZE]; // avoid overhead of PyUnicodeWriter APIs input_chars = PyUnicode_GET_LENGTH(pystr); input = PyUnicode_DATA(pystr); @@ -324,27 +325,102 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr, bool ascii_only) ret = PyUnicodeWriter_WriteChar(writer, '"'); if (ret) return ret; + // Fast path for string doesn't need escape at all: e.g. "id", "name" for (i = 0; i < input_chars; i++) { + c = PyUnicode_READ(kind, input, i); + if (!S_CHAR(c)) { + break; + } + } + if (i > 0) { + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i); + if (ret) return ret; + } + if (i == input_chars) { + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + buf_len = ascii_escape_unichar(c, (unsigned char*)buf, 0); + + for (i++ ; i < input_chars; i++) { Py_UCS4 c = PyUnicode_READ(kind, input, i); - if (c <= 0x1f || c == '\\' || c == '"' || (ascii_only && c >= 0x7f)) { - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - copy_len = 0; + if (S_CHAR(c)) { + buf[buf_len++] = c; + } + else { + buf_len = ascii_escape_unichar(c, (unsigned char*)buf, buf_len); + } - chars = ascii_escape_unichar(c, buf, 0); - ret = PyUnicodeWriter_WriteUTF8(writer, (const char*)buf, chars); + if (buf_len + 12 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); if (ret) return ret; + buf_len = 0; + } + } + + assert(buf_len < ESCAPE_BUF_SIZE); + buf[buf_len++] = '"'; + return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); +} + +static int +write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) +{ + Py_ssize_t i; + Py_ssize_t input_size; + Py_ssize_t buf_len; + const unsigned char *input; + int ret; + unsigned char c = 0; + char buf[ESCAPE_BUF_SIZE]; + + // We don't need to escape non-ASCII chars. + // So we just copy UTF-8 from pystr to buf. + input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size); + + ret = PyUnicodeWriter_WriteChar(writer, '"'); + if (ret) return ret; + + // Fast path for string doesn't need escape at all: e.g. "id", "name" + for (i = 0; i < input_size; i++) { + c = input[i]; + if (c <= 0x1f || c == '\\' || c == '"') { + break; + } + } + if (i > 0) { + ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i); + if (ret) return ret; + } + if (i == input_size) { + return PyUnicodeWriter_WriteChar(writer, '"'); + } + + buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0); + + for (i++; i < input_size; i++) { + c = input[i]; + if (c <= 0x1f || c == '\\' || c == '"') { + buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len); } else { - copy_len++; + buf[buf_len++] = c; + } + + if (buf_len + 6 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + if (ret) return ret; + buf_len = 0; } } - ret = PyUnicodeWriter_WriteSubstring(writer, pystr, i-copy_len, i); - if (ret) return ret; - return PyUnicodeWriter_WriteChar(writer, '"'); + assert(buf_len < ESCAPE_BUF_SIZE); + buf[buf_len++] = '"'; + return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); } +#undef ESCAPE_BUF_SIZE + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) { @@ -1293,17 +1369,15 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->sort_keys = sort_keys; s->skipkeys = skipkeys; s->allow_nan = allow_nan; - s->fast_encode = false; - s->ensure_ascii = false; + s->fast_encode = NULL; if (PyCFunction_Check(s->encoder)) { PyCFunction f = PyCFunction_GetFunction(s->encoder); if (f == py_encode_basestring_ascii){ - s->fast_encode = true; - s->ensure_ascii = true; + s->fast_encode = write_escaped_ascii; } else if (f == py_encode_basestring) { - s->fast_encode = true; + s->fast_encode = write_escaped_unicode; } } @@ -1497,7 +1571,7 @@ static int encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj) { if (s->fast_encode) { - return write_escaped_unicode(writer, obj, s->ensure_ascii); + return s->fast_encode(writer, obj); } /* Return the JSON representation of a string */ From 19c0f1fb5747a3f927f5f505966fdb732e75d953 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 1 May 2025 07:05:50 +0000 Subject: [PATCH 6/6] use UCS4 instead of UTF8 --- Modules/_json.c | 71 +++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index d6628fc2e871d6..cd08fa688d3a52 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr) return rval; } -#define ESCAPE_BUF_SIZE 200 - // Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii) static int write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr) { +#define ESCAPE_BUF_SIZE 200 Py_ssize_t i; Py_ssize_t input_chars; Py_ssize_t buf_len; @@ -367,60 +366,74 @@ static int write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr) { Py_ssize_t i; - Py_ssize_t input_size; - Py_ssize_t buf_len; - const unsigned char *input; + Py_ssize_t input_chars; + Py_ssize_t chars = 0; + const void *input; + int kind; int ret; - unsigned char c = 0; - char buf[ESCAPE_BUF_SIZE]; + Py_UCS4 output[ESCAPE_BUF_SIZE]; - // We don't need to escape non-ASCII chars. - // So we just copy UTF-8 from pystr to buf. - input = (const unsigned char*) PyUnicode_AsUTF8AndSize(pystr, &input_size); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); ret = PyUnicodeWriter_WriteChar(writer, '"'); if (ret) return ret; // Fast path for string doesn't need escape at all: e.g. "id", "name" - for (i = 0; i < input_size; i++) { - c = input[i]; + for (i = 0; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); if (c <= 0x1f || c == '\\' || c == '"') { break; } } if (i > 0) { - ret = PyUnicodeWriter_WriteUTF8(writer, (const char *)input, i); + ret = PyUnicodeWriter_WriteSubstring(writer, pystr, 0, i); if (ret) return ret; } - if (i == input_size) { + if (i == input_chars) { return PyUnicodeWriter_WriteChar(writer, '"'); } - buf_len = ascii_escape_unichar(c, (unsigned char *)buf, 0); + for (; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); - for (i++; i < input_size; i++) { - c = input[i]; - if (c <= 0x1f || c == '\\' || c == '"') { - buf_len = ascii_escape_unichar(c, (unsigned char *)buf, buf_len); - } - else { - buf[buf_len++] = c; + // Same to ENCODE_OUTPUT in escape_unicode + switch (c) { + case '\\': output[chars++] = '\\'; output[chars++] = c; break; + case '"': output[chars++] = '\\'; output[chars++] = c; break; + case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; + case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; + case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; + case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; + case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; + default: + if (c <= 0x1f) { + output[chars++] = '\\'; + output[chars++] = 'u'; + output[chars++] = '0'; + output[chars++] = '0'; + output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; + output[chars++] = Py_hexdigits[(c ) & 0xf]; + } else { + output[chars++] = c; + } } - if (buf_len + 6 > ESCAPE_BUF_SIZE) { - ret = PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + if (chars + 6 > ESCAPE_BUF_SIZE) { + ret = PyUnicodeWriter_WriteUCS4(writer, output, chars); if (ret) return ret; - buf_len = 0; + chars = 0; } } - assert(buf_len < ESCAPE_BUF_SIZE); - buf[buf_len++] = '"'; - return PyUnicodeWriter_WriteUTF8(writer, buf, buf_len); + assert(chars < ESCAPE_BUF_SIZE); + output[chars++] = '"'; + return PyUnicodeWriter_WriteUCS4(writer, output, chars); } - #undef ESCAPE_BUF_SIZE + static void raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end) {