mirror of https://github.com/python/cpython.git
Issue #11303: Added shortcuts for utf8 and latin1 encodings.
Documented the list of optimized encodings as CPython implementation detail.
This commit is contained in:
parent
eea22d2d66
commit
1d52146a25
|
@ -904,6 +904,15 @@ is meant to be exhaustive. Notice that spelling alternatives that only differ in
|
||||||
case or use a hyphen instead of an underscore are also valid aliases; therefore,
|
case or use a hyphen instead of an underscore are also valid aliases; therefore,
|
||||||
e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec.
|
e.g. ``'utf-8'`` is a valid alias for the ``'utf_8'`` codec.
|
||||||
|
|
||||||
|
.. impl-detail::
|
||||||
|
|
||||||
|
Some common encodings can bypass the codecs lookup machinery to
|
||||||
|
improve performance. These optimization opportunities are only
|
||||||
|
recognized by CPython for a limited set of aliases: utf-8, utf8,
|
||||||
|
latin-1, latin1, iso-8859-1, mbcs (Windows only), ascii, utf-16,
|
||||||
|
and utf-32. Using alternative spellings for these encodings may
|
||||||
|
result in slower execution.
|
||||||
|
|
||||||
Many of the character sets support the same languages. They vary in individual
|
Many of the character sets support the same languages. They vary in individual
|
||||||
characters (e.g. whether the EURO SIGN is supported or not), and in the
|
characters (e.g. whether the EURO SIGN is supported or not), and in the
|
||||||
assignment of characters to code positions. For the European languages in
|
assignment of characters to code positions. For the European languages in
|
||||||
|
|
|
@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
|
||||||
char lower[11]; /* Enough for any encoding shortcut */
|
char lower[11]; /* Enough for any encoding shortcut */
|
||||||
|
|
||||||
if (encoding == NULL)
|
if (encoding == NULL)
|
||||||
encoding = PyUnicode_GetDefaultEncoding();
|
return PyUnicode_DecodeUTF8(s, size, errors);
|
||||||
|
|
||||||
/* Shortcuts for common default encodings */
|
/* Shortcuts for common default encodings */
|
||||||
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
||||||
if (strcmp(lower, "utf-8") == 0)
|
if ((strcmp(lower, "utf-8") == 0) ||
|
||||||
|
(strcmp(lower, "utf8") == 0))
|
||||||
return PyUnicode_DecodeUTF8(s, size, errors);
|
return PyUnicode_DecodeUTF8(s, size, errors);
|
||||||
else if ((strcmp(lower, "latin-1") == 0) ||
|
else if ((strcmp(lower, "latin-1") == 0) ||
|
||||||
|
(strcmp(lower, "latin1") == 0) ||
|
||||||
(strcmp(lower, "iso-8859-1") == 0))
|
(strcmp(lower, "iso-8859-1") == 0))
|
||||||
return PyUnicode_DecodeLatin1(s, size, errors);
|
return PyUnicode_DecodeLatin1(s, size, errors);
|
||||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||||
|
@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encoding == NULL)
|
if (encoding == NULL)
|
||||||
encoding = PyUnicode_GetDefaultEncoding();
|
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||||
|
PyUnicode_GET_SIZE(unicode),
|
||||||
|
errors);
|
||||||
|
|
||||||
/* Shortcuts for common default encodings */
|
/* Shortcuts for common default encodings */
|
||||||
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
||||||
if (strcmp(lower, "utf-8") == 0)
|
if ((strcmp(lower, "utf-8") == 0) ||
|
||||||
|
(strcmp(lower, "utf8") == 0))
|
||||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||||
PyUnicode_GET_SIZE(unicode),
|
PyUnicode_GET_SIZE(unicode),
|
||||||
errors);
|
errors);
|
||||||
else if ((strcmp(lower, "latin-1") == 0) ||
|
else if ((strcmp(lower, "latin-1") == 0) ||
|
||||||
|
(strcmp(lower, "latin1") == 0) ||
|
||||||
(strcmp(lower, "iso-8859-1") == 0))
|
(strcmp(lower, "iso-8859-1") == 0))
|
||||||
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
|
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
|
||||||
PyUnicode_GET_SIZE(unicode),
|
PyUnicode_GET_SIZE(unicode),
|
||||||
|
|
Loading…
Reference in New Issue