From b62bdf71ea0cd52041d49691d8ae3dc645bd48e1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 31 Oct 2020 01:32:11 +0100 Subject: [PATCH] bpo-42208: Add _locale._get_locale_encoding() (GH-23052) * Add a new _locale._get_locale_encoding() function to get the current locale encoding. * Modify locale.getpreferredencoding() to use it. * Remove the _bootlocale module. --- Lib/_bootlocale.py | 46 ----------------- Lib/locale.py | 84 +++++++++++++++----------------- Lib/test/test_mimetypes.py | 18 +++---- Modules/_localemodule.c | 20 +++++++- Modules/clinic/_localemodule.c.h | 20 +++++++- PCbuild/lib.pyproj | 1 - 6 files changed, 86 insertions(+), 103 deletions(-) delete mode 100644 Lib/_bootlocale.py diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py deleted file mode 100644 index 3273a3b4225..00000000000 --- a/Lib/_bootlocale.py +++ /dev/null @@ -1,46 +0,0 @@ -"""A minimal subset of the locale module used at interpreter startup -(imported by the _io module), in order to reduce startup time. - -Don't import directly from third-party code; use the `locale` module instead! -""" - -import sys -import _locale - -if sys.platform.startswith("win"): - def getpreferredencoding(do_setlocale=True): - if sys.flags.utf8_mode: - return 'UTF-8' - return _locale._getdefaultlocale()[1] -else: - try: - _locale.CODESET - except AttributeError: - if hasattr(sys, 'getandroidapilevel'): - # On Android langinfo.h and CODESET are missing, and UTF-8 is - # always used in mbstowcs() and wcstombs(). - def getpreferredencoding(do_setlocale=True): - return 'UTF-8' - else: - def getpreferredencoding(do_setlocale=True): - if sys.flags.utf8_mode: - return 'UTF-8' - # This path for legacy systems needs the more complex - # getdefaultlocale() function, import the full locale module. - import locale - return locale.getpreferredencoding(do_setlocale) - else: - def getpreferredencoding(do_setlocale=True): - assert not do_setlocale - if sys.flags.utf8_mode: - return 'UTF-8' - result = _locale.nl_langinfo(_locale.CODESET) - if not result and sys.platform == 'darwin': - # nl_langinfo can return an empty string - # when the setting has an invalid value. - # Default to UTF-8 in that case because - # UTF-8 is the default charset on OSX and - # returning nothing will crash the - # interpreter. - result = 'UTF-8' - return result diff --git a/Lib/locale.py b/Lib/locale.py index 1a4e9f694f3..ee841e8b865 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL): """ _setlocale(category, _build_localename(getdefaultlocale())) -if sys.platform.startswith("win"): - # On Win32, this will return the ANSI code page - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using.""" - if sys.flags.utf8_mode: - return 'UTF-8' - import _bootlocale - return _bootlocale.getpreferredencoding(False) -else: - # On Unix, if CODESET is available, use that. - try: - CODESET - except NameError: + +try: + from _locale import _get_locale_encoding +except ImportError: + def _get_locale_encoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). - def getpreferredencoding(do_setlocale = True): - return 'UTF-8' - else: - # Fall back to parsing environment variables :-( - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - by looking at environment variables.""" - if sys.flags.utf8_mode: - return 'UTF-8' - res = getdefaultlocale()[1] - if res is None: - # LANG not set, default conservatively to ASCII - res = 'ascii' - return res - else: - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - according to the system configuration.""" - if sys.flags.utf8_mode: - return 'UTF-8' - import _bootlocale - if do_setlocale: - oldloc = setlocale(LC_CTYPE) - try: - setlocale(LC_CTYPE, "") - except Error: - pass - result = _bootlocale.getpreferredencoding(False) - if do_setlocale: - setlocale(LC_CTYPE, oldloc) - return result + return 'UTF-8' + if sys.flags.utf8_mode: + return 'UTF-8' + encoding = getdefaultlocale()[1] + if encoding is None: + # LANG not set, default conservatively to ASCII + encoding = 'ascii' + return encoding + +try: + CODESET +except NameError: + def getpreferredencoding(do_setlocale=True): + """Return the charset that the user is likely using.""" + return _get_locale_encoding() +else: + # On Unix, if CODESET is available, use that. + def getpreferredencoding(do_setlocale=True): + """Return the charset that the user is likely using, + according to the system configuration.""" + if sys.flags.utf8_mode: + return 'UTF-8' + + if not do_setlocale: + return _get_locale_encoding() + + old_loc = setlocale(LC_CTYPE) + try: + try: + setlocale(LC_CTYPE, "") + except Error: + pass + return _get_locale_encoding() + finally: + setlocale(LC_CTYPE, old_loc) ### Database diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index ddeae38e137..d63f6b66e10 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -3,7 +3,7 @@ import mimetypes import pathlib import sys -import unittest +import unittest.mock from test import support from test.support import os_helper @@ -71,14 +71,14 @@ def test_read_mime_types(self): # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. # Not with locale encoding. _bootlocale has been imported because io.open(...) # uses it. - with os_helper.temp_dir() as directory: - data = "application/no-mans-land Fran\u00E7ais" - file = pathlib.Path(directory, "sample.mimetype") - file.write_text(data, encoding='utf-8') - import _bootlocale - with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): - mime_dict = mimetypes.read_mime_types(file) - eq(mime_dict[".Français"], "application/no-mans-land") + data = "application/no-mans-land Fran\u00E7ais" + filename = "filename" + fp = io.StringIO(data) + with unittest.mock.patch.object(mimetypes, 'open', + return_value=fp) as mock_open: + mime_dict = mimetypes.read_mime_types(filename) + mock_open.assert_called_with(filename, encoding='utf-8') + eq(mime_dict[".Français"], "application/no-mans-land") def test_non_standard_types(self): eq = self.assertEqual diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 9c7ce876e40..359deb75440 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain, } Py_RETURN_NONE; } -#endif +#endif // HAVE_BIND_TEXTDOMAIN_CODESET + +#endif // HAVE_LIBINTL_H + + +/*[clinic input] +_locale._get_locale_encoding + +Get the current locale encoding. +[clinic start generated code]*/ + +static PyObject * +_locale__get_locale_encoding_impl(PyObject *module) +/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/ +{ + return _Py_GetLocaleEncoding(); +} -#endif static struct PyMethodDef PyLocale_Methods[] = { _LOCALE_SETLOCALE_METHODDEF @@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = { _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif #endif + _LOCALE__GET_LOCALE_ENCODING_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h index 5d1db3ece79..703d034c32e 100644 --- a/Modules/clinic/_localemodule.c.h +++ b/Modules/clinic/_localemodule.c.h @@ -545,6 +545,24 @@ exit: #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */ +PyDoc_STRVAR(_locale__get_locale_encoding__doc__, +"_get_locale_encoding($module, /)\n" +"--\n" +"\n" +"Get the current locale encoding."); + +#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \ + {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__}, + +static PyObject * +_locale__get_locale_encoding_impl(PyObject *module); + +static PyObject * +_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _locale__get_locale_encoding_impl(module); +} + #ifndef _LOCALE_STRCOLL_METHODDEF #define _LOCALE_STRCOLL_METHODDEF #endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */ @@ -584,4 +602,4 @@ exit: #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ -/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/ diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj index f0c51edb9d1..a15165d92ef 100644 --- a/PCbuild/lib.pyproj +++ b/PCbuild/lib.pyproj @@ -1572,7 +1572,6 @@ -