Add tornado.util.u and a fixer script to start using it.

2013-01-13 18:10:01 -05:00 · 2013-01-13 18:10:01 -05:00 · 8b40fafec7
parent 5cb63c9ca8
commit 8b40fafec7
3 changed files with 35 additions and 1 deletions
--- a/maint/scripts/custom_fixers/fix_unicode_literal.py
+++ b/maint/scripts/custom_fixers/fix_unicode_literal.py
@ -0,0 +1,17 @@
 import re
 from lib2to3.pgen2 import token
 from lib2to3 import fixer_base
 from lib2to3.fixer_util import Name, Call
 _literal_re = re.compile(ur"[uU][rR]?[\'\"]")
 class FixUnicodeLiteral(fixer_base.BaseFix):
    BM_compatible = True
    PATTERN = """STRING"""
    def transform(self, node, results):
        if node.type == token.STRING and _literal_re.match(node.value):
            new = node.clone()
            new.value = new.value[1:]
            new.prefix = ''
            node.replace(Call(Name(u'u', prefix=node.prefix), [new]))
--- a/tornado/test/util_test.py
+++ b/tornado/test/util_test.py
@ -1,7 +1,9 @@
 # coding: utf-8
 from __future__ import absolute_import, division, with_statement
 import sys
-from tornado.util import raise_exc_info, Configurable
+from tornado.escape import utf8
 from tornado.util import raise_exc_info, Configurable, u, b
 from tornado.test.util import unittest
@ -112,3 +114,8 @@ class ConfigurableTest(unittest.TestCase):
        # args bound in configure don't apply when using the subclass directly
        obj = TestConfig2()
        self.assertIs(obj.b, None)
 class UnicodeLiteralTest(unittest.TestCase):
    def test_unicode_escapes(self):
        self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))
--- a/tornado/util.py
+++ b/tornado/util.py
@ -68,13 +68,23 @@ def import_object(name):
 # to convert our string literals.  b() should only be applied to literal
 # latin1 strings.  Once we drop support for 2.5, we can remove this function
 # and just use byte literals.
 #
 # Fake unicode literal support:  Python 3.2 doesn't have the u'' marker for
 # literal strings, and alternative solutions like "from __future__ import
 # unicode_literals" have other problems (see PEP 414).  u() can be applied
 # to ascii strings that include \u escapes (but they must not contain
 # literal non-ascii characters).
 if str is unicode:
    def b(s):
        return s.encode('latin1')
    def u(s):
        return s
    bytes_type = bytes
 else:
    def b(s):
        return s
    def u(s):
        return s.decode('unicode_escape')
    bytes_type = str