Add tornado.util.u and a fixer script to start using it.
This commit is contained in:
parent
5cb63c9ca8
commit
8b40fafec7
|
@ -0,0 +1,17 @@
|
||||||
|
import re
|
||||||
|
from lib2to3.pgen2 import token
|
||||||
|
from lib2to3 import fixer_base
|
||||||
|
from lib2to3.fixer_util import Name, Call
|
||||||
|
|
||||||
|
_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
|
||||||
|
|
||||||
|
class FixUnicodeLiteral(fixer_base.BaseFix):
|
||||||
|
BM_compatible = True
|
||||||
|
PATTERN = """STRING"""
|
||||||
|
|
||||||
|
def transform(self, node, results):
|
||||||
|
if node.type == token.STRING and _literal_re.match(node.value):
|
||||||
|
new = node.clone()
|
||||||
|
new.value = new.value[1:]
|
||||||
|
new.prefix = ''
|
||||||
|
node.replace(Call(Name(u'u', prefix=node.prefix), [new]))
|
|
@ -1,7 +1,9 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import absolute_import, division, with_statement
|
from __future__ import absolute_import, division, with_statement
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from tornado.util import raise_exc_info, Configurable
|
from tornado.escape import utf8
|
||||||
|
from tornado.util import raise_exc_info, Configurable, u, b
|
||||||
from tornado.test.util import unittest
|
from tornado.test.util import unittest
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,3 +114,8 @@ class ConfigurableTest(unittest.TestCase):
|
||||||
# args bound in configure don't apply when using the subclass directly
|
# args bound in configure don't apply when using the subclass directly
|
||||||
obj = TestConfig2()
|
obj = TestConfig2()
|
||||||
self.assertIs(obj.b, None)
|
self.assertIs(obj.b, None)
|
||||||
|
|
||||||
|
|
||||||
|
class UnicodeLiteralTest(unittest.TestCase):
|
||||||
|
def test_unicode_escapes(self):
|
||||||
|
self.assertEqual(utf8(u('\u00e9')), b('\xc3\xa9'))
|
||||||
|
|
|
@ -68,13 +68,23 @@ def import_object(name):
|
||||||
# to convert our string literals. b() should only be applied to literal
|
# to convert our string literals. b() should only be applied to literal
|
||||||
# latin1 strings. Once we drop support for 2.5, we can remove this function
|
# latin1 strings. Once we drop support for 2.5, we can remove this function
|
||||||
# and just use byte literals.
|
# and just use byte literals.
|
||||||
|
#
|
||||||
|
# Fake unicode literal support: Python 3.2 doesn't have the u'' marker for
|
||||||
|
# literal strings, and alternative solutions like "from __future__ import
|
||||||
|
# unicode_literals" have other problems (see PEP 414). u() can be applied
|
||||||
|
# to ascii strings that include \u escapes (but they must not contain
|
||||||
|
# literal non-ascii characters).
|
||||||
if str is unicode:
|
if str is unicode:
|
||||||
def b(s):
|
def b(s):
|
||||||
return s.encode('latin1')
|
return s.encode('latin1')
|
||||||
|
def u(s):
|
||||||
|
return s
|
||||||
bytes_type = bytes
|
bytes_type = bytes
|
||||||
else:
|
else:
|
||||||
def b(s):
|
def b(s):
|
||||||
return s
|
return s
|
||||||
|
def u(s):
|
||||||
|
return s.decode('unicode_escape')
|
||||||
bytes_type = str
|
bytes_type = str
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue