From 4ff33af2574aa09bc8926c0deb116c886cb745d1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 5 Apr 2014 11:56:37 +0200 Subject: [PATCH] Issue #21118: Add unit test for invalid character replacement (code point higher than U+10ffff) --- Lib/test/test_unicode.py | 8 ++++++++ Objects/unicodeobject.c | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 58dfa20519b..7fda51c21d7 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -280,6 +280,14 @@ def test_maketrans_translate(self): self.assertEqual("[\xe9]".translate(str.maketrans({'\xe9': None})), "[]") + # invalid Unicode characters + invalid_char = 0x10ffff+1 + for before in "a\xe9\u20ac\U0010ffff": + mapping = str.maketrans({before: invalid_char}) + text = "[%s]" % before + self.assertRaises(ValueError, text.translate, mapping) + + # errors self.assertRaises(TypeError, self.type2test.maketrans) self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg') self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def') diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0386a871253..21837739bc3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8473,10 +8473,10 @@ charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) } else if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); - long max = PyUnicode_GetMax(); - if (value < 0 || value > max) { - PyErr_Format(PyExc_TypeError, - "character mapping must be in range(0x%x)", max+1); + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_ValueError, + "character mapping must be in range(0x%x)", + MAX_UNICODE+1); Py_DECREF(x); return -1; } @@ -8522,7 +8522,9 @@ charmaptranslate_output(Py_UCS4 ch, PyObject *mapping, } if (PyLong_Check(item)) { - Py_UCS4 ch = (Py_UCS4)PyLong_AS_LONG(item); + long ch = (Py_UCS4)PyLong_AS_LONG(item); + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { Py_DECREF(item); return -1; @@ -8570,11 +8572,9 @@ unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, if (PyLong_Check(item)) { long replace = (Py_UCS4)PyLong_AS_LONG(item); - if (replace == -1) { - Py_DECREF(item); - return -1; - } - if (replace < 0 || 127 < replace) { + /* PyLong_AS_LONG() cannot fail, charmaptranslate_lookup() already + used it */ + if (127 < replace) { /* invalid character or character outside ASCII: skip the fast translate */ goto exit;