Merge pull request #1432 from szweep/unescape_fix

Modify xhtml_unescape to handle hex numeric refs
This commit is contained in:
Ben Darnell 2015-04-28 21:12:09 -04:00
commit bb77f2887f
2 changed files with 17 additions and 1 deletions

View File

@ -378,7 +378,10 @@ def linkify(text, shorten=False, extra_params="",
def _convert_entity(m):
if m.group(1) == "#":
try:
return unichr(int(m.group(2)))
if m.group(2)[:1].lower() == 'x':
return unichr(int(m.group(2)[1:], 16))
else:
return unichr(int(m.group(2)))
except ValueError:
return "&#%s;" % m.group(2)
try:

View File

@ -154,6 +154,19 @@ class EscapeTestCase(unittest.TestCase):
self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_xhtml_unescape_numeric(self):
tests = [
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo bar', 'foo bar'),
('foo઼bar', u('foo\u0abcbar')),
('foo&#xyz;bar', 'foo&#xyz;bar'), # invalid encoding
('foo&#;bar', 'foo&#;bar'), # invalid encoding
('foo&#x;bar', 'foo&#x;bar'), # invalid encoding
]
for escaped, unescaped in tests:
self.assertEqual(unescaped, xhtml_unescape(escaped))
def test_url_escape_unicode(self):
tests = [
# byte strings are passed through as-is