]> granicus.if.org Git - python/commitdiff
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 3 Oct 2013 09:08:22 +0000 (12:08 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Thu, 3 Oct 2013 09:08:22 +0000 (12:08 +0300)
Lib/lib2to3/fixes/fix_unicode.py
Lib/lib2to3/tests/test_fixers.py
Misc/NEWS

index 6c89576540c242935f91424752ec3cc618e09b5f..922486b5d8510b1274d6b54c413b6c0fbf109f4b 100644 (file)
@@ -1,25 +1,43 @@
-"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
+r"""Fixer for unicode.
+
+* Changes unicode to str and unichr to chr.
+
+* If "...\u..." is not unicode literal change it into "...\\u...".
+
+* Change u"..." into "...".
 
 """
 
-import re
 from ..pgen2 import token
 from .. import fixer_base
 
 _mapping = {u"unichr" : u"chr", u"unicode" : u"str"}
-_literal_re = re.compile(ur"[uU][rR]?[\'\"]")
 
 class FixUnicode(fixer_base.BaseFix):
     BM_compatible = True
     PATTERN = "STRING | 'unicode' | 'unichr'"
 
+    def start_tree(self, tree, filename):
+        super(FixUnicode, self).start_tree(tree, filename)
+        self.unicode_literals = 'unicode_literals' in tree.future_features
+
     def transform(self, node, results):
         if node.type == token.NAME:
             new = node.clone()
             new.value = _mapping[node.value]
             return new
         elif node.type == token.STRING:
-            if _literal_re.match(node.value):
-                new = node.clone()
-                new.value = new.value[1:]
-                return new
+            val = node.value
+            if (not self.unicode_literals and val[0] in u'rR\'"' and
+                u'\\' in val):
+                val = ur'\\'.join([
+                    v.replace(u'\\u', ur'\\u').replace(u'\\U', ur'\\U')
+                    for v in val.split(ur'\\')
+                ])
+            if val[0] in u'uU':
+                val = val[1:]
+            if val == node.value:
+                return node
+            new = node.clone()
+            new.value = val
+            return new
index 1817208c7c82585340442a45d506fa86e8688ffa..5f283a83598bda50afc15b30cabaeee379a284b4 100644 (file)
@@ -2824,6 +2824,43 @@ class Test_unicode(FixerTestCase):
         a = """R'''x''' """
         self.check(b, a)
 
+    def test_native_literal_escape_u(self):
+        b = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+        b = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """r'\\\\\\\\u20ac\\\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+    def test_bytes_literal_escape_u(self):
+        b = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """b'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+        b = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """br'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+    def test_unicode_literal_escape_u(self):
+        b = """u'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+        b = """ur'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+    def test_native_unicode_literal_escape_u(self):
+        f = 'from __future__ import unicode_literals\n'
+        b = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = f + """'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
+        b = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        a = f + """r'\\\\\\u20ac\\U0001d121\\\\u20ac'"""
+        self.check(b, a)
+
 class Test_callable(FixerTestCase):
     fixer = "callable"
 
index feb3f080b0908edc7fba1d01a7e1b85c32e6b834..9aad84fc177765cea0c821f0de14745dc78a6f74 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
+
 - Issue #19137: The pprint module now correctly formats empty set and frozenset
   and instances of set and frozenset subclasses.