From fe08cdf2657bfde80aeed436fe49fee233651837 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:45:29 +0100 Subject: [PATCH] [3.13] gh-126742: Add _PyErr_SetLocaleString, use it for gdbm & dlerror messages (GH-126746) (GH-128023) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a helper to set an error from locale-encoded `char*` - Use the helper for gdbm & dlerror messages (cherry picked from commit 7303f06846b69016a075bca7ad7c6055f29ad024) Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner --- Include/internal/pycore_pyerrors.h | 12 ++++ Lib/test/test_ctypes/test_dlerror.py | 82 ++++++++++++++++++++++++---- Lib/test/test_dbm_gnu.py | 21 +++++-- Modules/_ctypes/_ctypes.c | 28 +++------- Modules/_ctypes/callproc.c | 38 ++++++------- Modules/_gdbmmodule.c | 44 +++++++++++---- Modules/_hashopenssl.c | 1 + Modules/_sqlite/util.c | 1 + Modules/_testcapi/exceptions.c | 1 + Modules/main.c | 1 + Modules/pyexpat.c | 7 ++- Python/errors.c | 9 +++ 12 files changed, 176 insertions(+), 69 deletions(-) diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index 15071638203..615cc23ec93 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -125,6 +125,18 @@ PyAPI_FUNC(void) _PyErr_SetString( PyObject *exception, const char *string); +/* + * Set an exception with the error message decoded from the current locale + * encoding (LC_CTYPE). + * + * Exceptions occurring in decoding take priority over the desired exception. + * + * Exported for '_ctypes' shared extensions. + */ +PyAPI_FUNC(void) _PyErr_SetLocaleString( + PyObject *exception, + const char *string); + PyAPI_FUNC(PyObject*) _PyErr_Format( PyThreadState *tstate, PyObject *exception, diff --git a/Lib/test/test_ctypes/test_dlerror.py b/Lib/test/test_ctypes/test_dlerror.py index 4441e30cd7a..c3c34d43481 100644 --- a/Lib/test/test_ctypes/test_dlerror.py +++ b/Lib/test/test_ctypes/test_dlerror.py @@ -1,7 +1,12 @@ +import _ctypes import os -import sys -import unittest import platform +import sys +import test.support +import unittest +from ctypes import CDLL, c_int +from ctypes.util import find_library + FOO_C = r""" #include @@ -26,7 +31,7 @@ @unittest.skipUnless(sys.platform.startswith('linux'), - 'Test only valid for Linux') + 'test requires GNU IFUNC support') class TestNullDlsym(unittest.TestCase): """GH-126554: Ensure that we catch NULL dlsym return values @@ -53,14 +58,6 @@ def test_null_dlsym(self): import subprocess import tempfile - # To avoid ImportErrors on Windows, where _ctypes does not have - # dlopen and dlsym, - # import here, i.e., inside the test function. - # The skipUnless('linux') decorator ensures that we're on linux - # if we're executing these statements. - from ctypes import CDLL, c_int - from _ctypes import dlopen, dlsym - retcode = subprocess.call(["gcc", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) @@ -111,6 +108,8 @@ def test_null_dlsym(self): self.assertEqual(os.read(pipe_r, 2), b'OK') # Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c + dlopen = test.support.get_attribute(_ctypes, 'dlopen') + dlsym = test.support.get_attribute(_ctypes, 'dlsym') L = dlopen(dstname) with self.assertRaisesRegex(OSError, "symbol 'foo' not found"): dlsym(L, "foo") @@ -119,5 +118,66 @@ def test_null_dlsym(self): self.assertEqual(os.read(pipe_r, 2), b'OK') +@unittest.skipUnless(os.name != 'nt', 'test requires dlerror() calls') +class TestLocalization(unittest.TestCase): + + @staticmethod + def configure_locales(func): + return test.support.run_with_locale( + 'LC_ALL', + 'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk', + 'fr_FR.utf8', 'en_US.utf8', + '', + )(func) + + @classmethod + def setUpClass(cls): + cls.libc_filename = find_library("c") + + @configure_locales + def test_localized_error_from_dll(self): + dll = CDLL(self.libc_filename) + with self.assertRaises(AttributeError) as cm: + dll.this_name_does_not_exist + if sys.platform.startswith('linux'): + # On macOS, the filename is not reported by dlerror(). + self.assertIn(self.libc_filename, str(cm.exception)) + + @configure_locales + def test_localized_error_in_dll(self): + dll = CDLL(self.libc_filename) + with self.assertRaises(ValueError) as cm: + c_int.in_dll(dll, 'this_name_does_not_exist') + if sys.platform.startswith('linux'): + # On macOS, the filename is not reported by dlerror(). + self.assertIn(self.libc_filename, str(cm.exception)) + + @unittest.skipUnless(hasattr(_ctypes, 'dlopen'), + 'test requires _ctypes.dlopen()') + @configure_locales + def test_localized_error_dlopen(self): + missing_filename = b'missing\xff.so' + # Depending whether the locale, we may encode '\xff' differently + # but we are only interested in avoiding a UnicodeDecodeError + # when reporting the dlerror() error message which contains + # the localized filename. + filename_pattern = r'missing.*?\.so' + with self.assertRaisesRegex(OSError, filename_pattern): + _ctypes.dlopen(missing_filename, 2) + + @unittest.skipUnless(hasattr(_ctypes, 'dlopen'), + 'test requires _ctypes.dlopen()') + @unittest.skipUnless(hasattr(_ctypes, 'dlsym'), + 'test requires _ctypes.dlsym()') + @configure_locales + def test_localized_error_dlsym(self): + dll = _ctypes.dlopen(self.libc_filename) + with self.assertRaises(OSError) as cm: + _ctypes.dlsym(dll, 'this_name_does_not_exist') + if sys.platform.startswith('linux'): + # On macOS, the filename is not reported by dlerror(). + self.assertIn(self.libc_filename, str(cm.exception)) + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py index e20addf1f04..66268c42a30 100644 --- a/Lib/test/test_dbm_gnu.py +++ b/Lib/test/test_dbm_gnu.py @@ -1,10 +1,11 @@ -from test import support -from test.support import import_helper, cpython_only -gdbm = import_helper.import_module("dbm.gnu") #skip if not supported -import unittest import os -from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath +import unittest +from test import support +from test.support import cpython_only, import_helper +from test.support.os_helper import (TESTFN, TESTFN_NONASCII, FakePath, + create_empty_file, temp_dir, unlink) +gdbm = import_helper.import_module("dbm.gnu") # skip if not supported filename = TESTFN @@ -205,6 +206,16 @@ def test_clear(self): self.assertNotIn(k, db) self.assertEqual(len(db), 0) + @support.run_with_locale( + 'LC_ALL', + 'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk', + 'fr_FR.utf8', 'en_US.utf8', + '', + ) + def test_localized_error(self): + with temp_dir() as d: + create_empty_file(os.path.join(d, 'test')) + self.assertRaises(gdbm.error, gdbm.open, filename, 'r') if __name__ == '__main__': diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 1509cee6022..3d7cb1b1164 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -986,15 +986,8 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, PyObject *dll, #ifdef USE_DLERROR const char *dlerr = dlerror(); if (dlerr) { - PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); - if (message) { - PyErr_SetObject(PyExc_ValueError, message); - Py_DECREF(message); - return NULL; - } - // Ignore errors from PyUnicode_DecodeLocale, - // fall back to the generic error below. - PyErr_Clear(); + _PyErr_SetLocaleString(PyExc_ValueError, dlerr); + return NULL; } #endif #undef USE_DLERROR @@ -3809,21 +3802,14 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds) address = (PPROC)dlsym(handle, name); if (!address) { - #ifdef USE_DLERROR + #ifdef USE_DLERROR const char *dlerr = dlerror(); if (dlerr) { - PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); - if (message) { - PyErr_SetObject(PyExc_AttributeError, message); - Py_DECREF(ftuple); - Py_DECREF(message); - return NULL; - } - // Ignore errors from PyUnicode_DecodeLocale, - // fall back to the generic error below. - PyErr_Clear(); + _PyErr_SetLocaleString(PyExc_AttributeError, dlerr); + Py_DECREF(ftuple); + return NULL; } - #endif + #endif PyErr_Format(PyExc_AttributeError, "function '%s' not found", name); Py_DECREF(ftuple); return NULL; diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index b5511f06e39..f9864ebb735 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1579,10 +1579,11 @@ static PyObject *py_dl_open(PyObject *self, PyObject *args) Py_XDECREF(name2); if (!handle) { const char *errmsg = dlerror(); - if (!errmsg) - errmsg = "dlopen() error"; - PyErr_SetString(PyExc_OSError, - errmsg); + if (errmsg) { + _PyErr_SetLocaleString(PyExc_OSError, errmsg); + return NULL; + } + PyErr_SetString(PyExc_OSError, "dlopen() error"); return NULL; } return PyLong_FromVoidPtr(handle); @@ -1595,8 +1596,12 @@ static PyObject *py_dl_close(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "O&:dlclose", &_parse_voidp, &handle)) return NULL; if (dlclose(handle)) { - PyErr_SetString(PyExc_OSError, - dlerror()); + const char *errmsg = dlerror(); + if (errmsg) { + _PyErr_SetLocaleString(PyExc_OSError, errmsg); + return NULL; + } + PyErr_SetString(PyExc_OSError, "dlclose() error"); return NULL; } Py_RETURN_NONE; @@ -1630,21 +1635,14 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args) if (ptr) { return PyLong_FromVoidPtr(ptr); } - #ifdef USE_DLERROR - const char *dlerr = dlerror(); - if (dlerr) { - PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape"); - if (message) { - PyErr_SetObject(PyExc_OSError, message); - Py_DECREF(message); - return NULL; - } - // Ignore errors from PyUnicode_DecodeLocale, - // fall back to the generic error below. - PyErr_Clear(); + #ifdef USE_DLERROR + const char *errmsg = dlerror(); + if (errmsg) { + _PyErr_SetLocaleString(PyExc_OSError, errmsg); + return NULL; } - #endif - #undef USE_DLERROR + #endif + #undef USE_DLERROR PyErr_Format(PyExc_OSError, "symbol '%s' not found", name); return NULL; } diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c index df7fba67810..ea4fe247987 100644 --- a/Modules/_gdbmmodule.c +++ b/Modules/_gdbmmodule.c @@ -8,10 +8,11 @@ #endif #include "Python.h" +#include "pycore_pyerrors.h" // _PyErr_SetLocaleString() #include "gdbm.h" #include -#include // free() +#include // free() #include #include @@ -33,6 +34,24 @@ get_gdbm_state(PyObject *module) return (_gdbm_state *)state; } +/* + * Set the gdbm error obtained by gdbm_strerror(gdbm_errno). + * + * If no error message exists, a generic (UTF-8) error message + * is used instead. + */ +static void +set_gdbm_error(_gdbm_state *state, const char *generic_error) +{ + const char *gdbm_errmsg = gdbm_strerror(gdbm_errno); + if (gdbm_errmsg) { + _PyErr_SetLocaleString(state->gdbm_error, gdbm_errmsg); + } + else { + PyErr_SetString(state->gdbm_error, generic_error); + } +} + /*[clinic input] module _gdbm class _gdbm.gdbm "gdbmobject *" "&Gdbmtype" @@ -91,7 +110,7 @@ newgdbmobject(_gdbm_state *state, const char *file, int flags, int mode) PyErr_SetFromErrnoWithFilename(state->gdbm_error, file); } else { - PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno)); + set_gdbm_error(state, "gdbm_open() error"); } Py_DECREF(dp); return NULL; @@ -136,7 +155,7 @@ gdbm_length(gdbmobject *dp) PyErr_SetFromErrno(state->gdbm_error); } else { - PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno)); + set_gdbm_error(state, "gdbm_count() error"); } return -1; } @@ -286,7 +305,7 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w) PyErr_SetObject(PyExc_KeyError, v); } else { - PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno)); + set_gdbm_error(state, "gdbm_delete() error"); } return -1; } @@ -297,11 +316,12 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w) } errno = 0; if (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) { - if (errno != 0) + if (errno != 0) { PyErr_SetFromErrno(state->gdbm_error); - else - PyErr_SetString(state->gdbm_error, - gdbm_strerror(gdbm_errno)); + } + else { + set_gdbm_error(state, "gdbm_store() error"); + } return -1; } } @@ -534,10 +554,12 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls) check_gdbmobject_open(self, state->gdbm_error); errno = 0; if (gdbm_reorganize(self->di_dbm) < 0) { - if (errno != 0) + if (errno != 0) { PyErr_SetFromErrno(state->gdbm_error); - else - PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno)); + } + else { + set_gdbm_error(state, "gdbm_reorganize() error"); + } return NULL; } Py_RETURN_NONE; diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 14d9c186151..d2e7630c22e 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -320,6 +320,7 @@ _setException(PyObject *exc, const char* altmsg, ...) va_end(vargs); ERR_clear_error(); + /* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */ lib = ERR_lib_error_string(errcode); func = ERR_func_error_string(errcode); reason = ERR_reason_error_string(errcode); diff --git a/Modules/_sqlite/util.c b/Modules/_sqlite/util.c index 9e8613ef679..b0622e66928 100644 --- a/Modules/_sqlite/util.c +++ b/Modules/_sqlite/util.c @@ -134,6 +134,7 @@ _pysqlite_seterror(pysqlite_state *state, sqlite3 *db) /* Create and set the exception. */ int extended_errcode = sqlite3_extended_errcode(db); + // sqlite3_errmsg() always returns an UTF-8 encoded message const char *errmsg = sqlite3_errmsg(db); raise_exception(exc_class, extended_errcode, errmsg); return extended_errcode; diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 42a9915143e..d73625c453f 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -3,6 +3,7 @@ #include "parts.h" #include "util.h" + #include "clinic/exceptions.c.h" diff --git a/Modules/main.c b/Modules/main.c index 15ea49a1bad..3bf2241f283 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -374,6 +374,7 @@ pymain_run_file_obj(PyObject *program_name, PyObject *filename, if (fp == NULL) { // Ignore the OSError PyErr_Clear(); + // TODO(picnixz): strerror() is locale dependent but not PySys_FormatStderr(). PySys_FormatStderr("%S: can't open file %R: [Errno %d] %s\n", program_name, filename, errno, strerror(errno)); return 2; diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index fafb5ce4106..e131389805b 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1782,7 +1782,12 @@ add_error(PyObject *errors_module, PyObject *codes_dict, * with the other uses of the XML_ErrorString function * elsewhere within this file. pyexpat's copy of the messages * only acts as a fallback in case of outdated runtime libexpat, - * where it returns NULL. */ + * where it returns NULL. + * + * In addition, XML_ErrorString is assumed to return UTF-8 encoded + * strings (in conv_string_to_unicode, we decode them using 'strict' + * error handling). + */ const char *error_string = XML_ErrorString(error_code); if (error_string == NULL) { error_string = error_info_of[error_index].description; diff --git a/Python/errors.c b/Python/errors.c index bf1ddfa8cfb..f19c030b1be 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -299,6 +299,15 @@ PyErr_SetString(PyObject *exception, const char *string) _PyErr_SetString(tstate, exception, string); } +void +_PyErr_SetLocaleString(PyObject *exception, const char *string) +{ + PyObject *value = PyUnicode_DecodeLocale(string, "surrogateescape"); + if (value != NULL) { + PyErr_SetObject(exception, value); + Py_DECREF(value); + } +} PyObject* _Py_HOT_FUNCTION PyErr_Occurred(void)