From 484d9a409a94e719329b41edaed38c1b16b8de7d Mon Sep 17 00:00:00 2001 From: Neal Norwitz Date: Fri, 30 Sep 2005 04:46:49 +0000 Subject: [PATCH] Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, but Python incorrectly assumes it is in UTF-8 format Will backport. --- Lib/test/test_minidom.py | 9 +++++++++ Misc/ACKS | 1 + Misc/NEWS | 3 +++ Modules/pyexpat.c | 7 ++++++- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 3154fbf3e62..8b4c71523c1 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -889,6 +889,15 @@ def testEncodings(): and doc.toxml('utf-8') == '\xe2\x82\xac' and doc.toxml('iso-8859-15') == '\xa4', "testEncodings - encoding EURO SIGN") + + # Verify that character decoding errors throw exceptions instead of crashing + try: + doc = parseString('Comment \xe7a va ? Tr\xe8s bien ?') + except UnicodeDecodeError: + pass + else: + print 'parsing with bad encoding should raise a UnicodeDecodeError' + doc.unlink() class UserDataHandler: diff --git a/Misc/ACKS b/Misc/ACKS index c1cb855f239..34ebc5d2fd1 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -305,6 +305,7 @@ Flemming Kj Jiba Orjan Johansen Simon Johnston +Evan Jones Richard Jones Irmen de Jong Lucas de Jonge diff --git a/Misc/NEWS b/Misc/NEWS index 3c06f4ecf8d..f4f918c6f40 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -153,6 +153,9 @@ present). Extension Modules ----------------- +- Patch #1309009, Fix segfault in pyexpat when the XML document is in latin_1, + but Python incorrectly assumes it is in UTF-8 format + - Fix parse errors in the readline module when compiling without threads. - Patch #1288833: Removed thread lock from socket.getaddrinfo on diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index e6c14f8a2db..438f7609cb2 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -417,6 +417,9 @@ string_intern(xmlparseobject *self, const char* str) { PyObject *result = STRING_CONV_FUNC(str); PyObject *value; + /* result can be NULL if the unicode conversion failed. */ + if (!result) + return result; if (!self->intern) return result; value = PyDict_GetItem(self->intern, result); @@ -572,7 +575,9 @@ my_StartElementHandler(void *userData, Py_DECREF(v); } } - args = Py_BuildValue("(NN)", string_intern(self, name), container); + args = string_intern(self, name); + if (args != NULL) + args = Py_BuildValue("(NN)", args, container); if (args == NULL) { Py_DECREF(container); return;