Add tornado.util.u and a fixer script to start using it.

This commit is contained in:
Ben Darnell 2013-01-13 18:10:01 -05:00
parent 5cb63c9ca8
commit 8b40fafec7
3 changed files with 35 additions and 1 deletions

View File

@ -0,0 +1,17 @@
import re
from lib2to3.pgen2 import token
from lib2to3 import fixer_base
from lib2to3.fixer_util import Name, Call
_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
class FixUnicodeLiteral(fixer_base.BaseFix):
BM_compatible = True
PATTERN = """STRING"""
def transform(self, node, results):
if node.type == token.STRING and _literal_re.match(node.value):
new = node.clone()
new.value = new.value[1:]
new.prefix = ''
node.replace(Call(Name(u'u', prefix=node.prefix), [new]))

View File

@ -1,7 +1,9 @@
# coding: utf-8
from __future__ import absolute_import, division, with_statement from __future__ import absolute_import, division, with_statement
import sys import sys
from tornado.util import raise_exc_info, Configurable from tornado.escape import utf8
from tornado.util import raise_exc_info, Configurable, u, b
from tornado.test.util import unittest from tornado.test.util import unittest
@ -112,3 +114,8 @@ class ConfigurableTest(unittest.TestCase):
# args bound in configure don't apply when using the subclass directly # args bound in configure don't apply when using the subclass directly
obj = TestConfig2() obj = TestConfig2()
self.assertIs(obj.b, None) self.assertIs(obj.b, None)
class UnicodeLiteralTest(unittest.TestCase):
def test_unicode_escapes(self):
self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))

View File

@ -68,13 +68,23 @@ def import_object(name):
# to convert our string literals. b() should only be applied to literal # to convert our string literals. b() should only be applied to literal
# latin1 strings. Once we drop support for 2.5, we can remove this function # latin1 strings. Once we drop support for 2.5, we can remove this function
# and just use byte literals. # and just use byte literals.
#
# Fake unicode literal support: Python 3.2 doesn't have the u'' marker for
# literal strings, and alternative solutions like "from __future__ import
# unicode_literals" have other problems (see PEP 414). u() can be applied
# to ascii strings that include \u escapes (but they must not contain
# literal non-ascii characters).
if str is unicode: if str is unicode:
def b(s): def b(s):
return s.encode('latin1') return s.encode('latin1')
def u(s):
return s
bytes_type = bytes bytes_type = bytes
else: else:
def b(s): def b(s):
return s return s
def u(s):
return s.decode('unicode_escape')
bytes_type = str bytes_type = str