mirror of https://github.com/python/cpython.git
Issue #11303: Added shortcuts for utf8 and latin1 encodings.
Documented the list of optimized encodings as CPython implementation detail.
This commit is contained in:
parent
eea22d2d66
commit
1d52146a25
|
@ -904,6 +904,15 @@ is meant to be exhaustive. Notice that spelling alternatives that only differ in
|
|||
case or use a hyphen instead of an underscore are also valid aliases; therefore,
|
||||
e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
Some common encodings can bypass the codecs lookup machinery to
|
||||
improve performance. These optimization opportunities are only
|
||||
recognized by CPython for a limited set of aliases: utf-8, utf8,
|
||||
latin-1, latin1, iso-8859-1, mbcs (Windows only), ascii, utf-16,
|
||||
and utf-32. Using alternative spellings for these encodings may
|
||||
result in slower execution.
|
||||
|
||||
Many of the character sets support the same languages. They vary in individual
|
||||
characters (e.g. whether the EURO SIGN is supported or not), and in the
|
||||
assignment of characters to code positions. For the European languages in
|
||||
|
|
|
@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
|
|||
char lower[11]; /* Enough for any encoding shortcut */
|
||||
|
||||
if (encoding == NULL)
|
||||
encoding = PyUnicode_GetDefaultEncoding();
|
||||
return PyUnicode_DecodeUTF8(s, size, errors);
|
||||
|
||||
/* Shortcuts for common default encodings */
|
||||
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
||||
if (strcmp(lower, "utf-8") == 0)
|
||||
if ((strcmp(lower, "utf-8") == 0) ||
|
||||
(strcmp(lower, "utf8") == 0))
|
||||
return PyUnicode_DecodeUTF8(s, size, errors);
|
||||
else if ((strcmp(lower, "latin-1") == 0) ||
|
||||
(strcmp(lower, "latin1") == 0) ||
|
||||
(strcmp(lower, "iso-8859-1") == 0))
|
||||
return PyUnicode_DecodeLatin1(s, size, errors);
|
||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||
|
@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
|
|||
}
|
||||
|
||||
if (encoding == NULL)
|
||||
encoding = PyUnicode_GetDefaultEncoding();
|
||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||
PyUnicode_GET_SIZE(unicode),
|
||||
errors);
|
||||
|
||||
/* Shortcuts for common default encodings */
|
||||
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
||||
if (strcmp(lower, "utf-8") == 0)
|
||||
if ((strcmp(lower, "utf-8") == 0) ||
|
||||
(strcmp(lower, "utf8") == 0))
|
||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||
PyUnicode_GET_SIZE(unicode),
|
||||
errors);
|
||||
else if ((strcmp(lower, "latin-1") == 0) ||
|
||||
(strcmp(lower, "latin1") == 0) ||
|
||||
(strcmp(lower, "iso-8859-1") == 0))
|
||||
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
|
||||
PyUnicode_GET_SIZE(unicode),
|
||||
|
|
Loading…
Reference in New Issue