mirror of https://github.com/python/cpython.git
Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.
Patch by Xiang Zhang.
This commit is contained in:
parent
b7d14a09c2
commit
998c9cdd42
|
@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||||
Py_ssize_t size,
|
Py_ssize_t size,
|
||||||
const char *errors)
|
const char *errors)
|
||||||
{
|
{
|
||||||
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
|
Py_ssize_t i; /* index into data of next input character */
|
||||||
|
|
||||||
Py_ssize_t i; /* index into s of next input byte */
|
|
||||||
char *p; /* next free byte in output buffer */
|
char *p; /* next free byte in output buffer */
|
||||||
#if STRINGLIB_SIZEOF_CHAR > 1
|
#if STRINGLIB_SIZEOF_CHAR > 1
|
||||||
PyObject *error_handler_obj = NULL;
|
PyObject *error_handler_obj = NULL;
|
||||||
|
@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
/* subtract preallocated bytes */
|
/* subtract preallocated bytes */
|
||||||
writer.min_size -= max_char_size;
|
writer.min_size -= max_char_size * (newpos - startpos);
|
||||||
|
|
||||||
if (PyBytes_Check(rep)) {
|
if (PyBytes_Check(rep)) {
|
||||||
p = _PyBytesWriter_WriteBytes(&writer, p,
|
p = _PyBytesWriter_WriteBytes(&writer, p,
|
||||||
|
@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
if (!PyUnicode_IS_ASCII(rep)) {
|
if (!PyUnicode_IS_ASCII(rep)) {
|
||||||
raise_encode_exception(&exc, "utf-8",
|
raise_encode_exception(&exc, "utf-8", unicode,
|
||||||
unicode,
|
startpos, endpos,
|
||||||
i-1, i,
|
|
||||||
"surrogates not allowed");
|
"surrogates not allowed");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
|
||||||
p = _PyBytesWriter_WriteBytes(&writer, p,
|
p = _PyBytesWriter_WriteBytes(&writer, p,
|
||||||
PyUnicode_DATA(rep),
|
PyUnicode_DATA(rep),
|
||||||
PyUnicode_GET_LENGTH(rep));
|
PyUnicode_GET_LENGTH(rep));
|
||||||
|
@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
|
||||||
_PyBytesWriter_Dealloc(&writer);
|
_PyBytesWriter_Dealloc(&writer);
|
||||||
return NULL;
|
return NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#undef MAX_SHORT_UNICHARS
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The pattern for constructing UCS2-repeated masks. */
|
/* The pattern for constructing UCS2-repeated masks. */
|
||||||
|
|
Loading…
Reference in New Issue