Merge pull request #1432 from szweep/unescape_fix

Modify xhtml_unescape to handle hex numeric refs
2015-04-28 21:12:09 -04:00 · 2015-04-28 21:12:09 -04:00 · bb77f2887f
parent 609dbb92e7 598ffdace4
commit bb77f2887f
2 changed files with 17 additions and 1 deletions
--- a/tornado/escape.py
+++ b/tornado/escape.py
@ -378,7 +378,10 @@ def linkify(text, shorten=False, extra_params="",
 def _convert_entity(m):
    if m.group(1) == "#":
        try:
-            return unichr(int(m.group(2)))
+            if m.group(2)[:1].lower() == 'x':
+                return unichr(int(m.group(2)[1:], 16))
+            else:
+                return unichr(int(m.group(2)))
        except ValueError:
            return "&#%s;" % m.group(2)
    try:
--- a/tornado/test/escape_test.py
+++ b/tornado/test/escape_test.py
@ -154,6 +154,19 @@ class EscapeTestCase(unittest.TestCase):
            self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
            self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))

+    def test_xhtml_unescape_numeric(self):
+        tests = [
+            ('foo&#32;bar', 'foo bar'),
+            ('foo&#x20;bar', 'foo bar'),
+            ('foo&#X20;bar', 'foo bar'),
+            ('foo&#xabc;bar', u('foo\u0abcbar')),
+            ('foo&#xyz;bar', 'foo&#xyz;bar'),  # invalid encoding
+            ('foo&#;bar', 'foo&#;bar'),        # invalid encoding
+            ('foo&#x;bar', 'foo&#x;bar'),      # invalid encoding
+        ]
+        for escaped, unescaped in tests:
+            self.assertEqual(unescaped, xhtml_unescape(escaped))
+
    def test_url_escape_unicode(self):
        tests = [
            # byte strings are passed through as-is