diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py index feb7bd595a2..7b47cb50e82 100644 --- a/Lib/test/test_multibytecodec.py +++ b/Lib/test/test_multibytecodec.py @@ -45,6 +45,10 @@ def test_errorcallback_longindex(self): self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam', 'test.cjktest') + def test_errorhandler_returns_bytes(self): + enc = "\u30fb\udc80".encode('gb18030', 'surrogateescape') + self.assertEqual(enc, b'\x819\xa79\x80') + def test_codingspec(self): try: for enc in ALL_CJKENCODINGS: diff --git a/Misc/NEWS b/Misc/NEWS index bdfe16158bc..6eff12c84ec 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -98,6 +98,9 @@ Core and Builtins Library ------- +- Issue #16585: Make CJK encoders support error handlers that return bytes per + PEP 383. + - Issue #10182: The re module doesn't truncate indices to 32 bits anymore. Patch by Serhiy Storchaka. diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c index 40717d8ba93..c032cdb0ed3 100644 --- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -316,7 +316,7 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || - !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) || + (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) || !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) { PyErr_SetString(PyExc_TypeError, "encoding error handler must return " @@ -324,7 +324,7 @@ multibytecodec_encerror(MultibyteCodec *codec, goto errorexit; } - { + if (PyUnicode_Check(tobj)) { const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj); retstr = multibytecodec_encode(codec, state, &uraw, @@ -333,6 +333,10 @@ multibytecodec_encerror(MultibyteCodec *codec, if (retstr == NULL) goto errorexit; } + else { + Py_INCREF(tobj); + retstr = tobj; + } assert(PyBytes_Check(retstr)); retstrsize = PyBytes_GET_SIZE(retstr);