diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 4ec7b5865cc..e060471d4b0 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1549,6 +1549,11 @@ def test_charmap(self): self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"), b"foo\xa5bar") + def test_latin1(self): + # Issue6373 + self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"), + b"\xe4\xeb\xef\xf6\xfc") + def test_main(): support.run_unittest( diff --git a/Misc/NEWS b/Misc/NEWS index 2381844249c..98539e53173 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #6373: Fixed a RuntimeError when encoding with the latin-1 codec and + the 'surrogateescape' error handler, a string which contains unpaired + surrogates. + - Issue #4856: Remove checks for win NT. Library diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0d4a3ddd806..305289bc78c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4201,10 +4201,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, repsize = PyBytes_Size(repunicode); if (repsize > 1) { /* Make room for all additional bytes. */ + respos = str - PyBytes_AS_STRING(res); if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; } + str = PyBytes_AS_STRING(res) + respos; ressize += repsize-1; } memcpy(str, PyBytes_AsString(repunicode), repsize);