Merged revisions 78729 via svnmerge from

author Ezio Melotti <ezio.melotti@gmail.com>

Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)

committer Ezio Melotti <ezio.melotti@gmail.com>

Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)
author Ezio Melotti <ezio.melotti@gmail.com>
Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)
committer Ezio Melotti <ezio.melotti@gmail.com>
Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index bc71b587751c5527f2da1aaccd0cb71be50e7416..13737ca12f0e3ffd8ce362f26a8309057eadf081 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -786,12 +786,18 @@ def parse_template(source, pattern):
      groups = []
      groupsappend = groups.append
      literals = [None] * len(p)
+    if isinstance(source, str):
+        encode = lambda x: x
+    else:
+        # The tokenizer implicitly decodes bytes objects as latin-1, we must
+        # therefore re-encode the final representation.
+        encode = lambda x: x.encode('latin1')
      for c, s in p:
          if c is MARK:
              groupsappend((i, s))
              # literal[i] is already None
          else:
-            literals[i] = s
+            literals[i] = encode(s)
          i = i + 1
      return groups, literals
  
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index 99cc47ba3a3e3766eb1a168790699198204dd2a9..44b5dfe212fc076fa7548fcdb2a18e5f9d33d3ed 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -696,6 +696,24 @@ class ReTests(unittest.TestCase):
          self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
          self.assertRaises(ValueError, re.compile, '(?au)\w')
  
+    def test_bug_6509(self):
+        # Replacement strings of both types must parse properly.
+        # all strings
+        pat = re.compile('a(\w)')
+        self.assertEqual(pat.sub('b\\1', 'ac'), 'bc')
+        pat = re.compile('a(.)')
+        self.assertEqual(pat.sub('b\\1', 'a\u1234'), 'b\u1234')
+        pat = re.compile('..')
+        self.assertEqual(pat.sub(lambda m: 'str', 'a5'), 'str')
+
+        # all bytes
+        pat = re.compile(b'a(\w)')
+        self.assertEqual(pat.sub(b'b\\1', b'ac'), b'bc')
+        pat = re.compile(b'a(.)')
+        self.assertEqual(pat.sub(b'b\\1', b'a\xCD'), b'b\xCD')
+        pat = re.compile(b'..')
+        self.assertEqual(pat.sub(lambda m: b'bytes', b'a5'), b'bytes')
+
      def test_dealloc(self):
          # issue 3299: check for segfault in debug build
          import _sre
diff --git a/Misc/NEWS b/Misc/NEWS

index be7a5340ac3635edeea6ddde77bb6add7d8926e0..03bd4ecbc9a53e13657678dbc665f911bfc21dbd 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -100,6 +100,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #6509: fix re.sub to work properly when the pattern, the string, and
+  the replacement were all bytes. Patch by Antoine Pitrou.
+
  - Issue #1054943: Fix unicodedata.normalize('NFC', text) for the Public Review
    Issue #29
author	Ezio Melotti <ezio.melotti@gmail.com>
	Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)
committer	Ezio Melotti <ezio.melotti@gmail.com>
	Sat, 6 Mar 2010 15:27:04 +0000 (15:27 +0000)
Lib/sre_parse.py		patch \| blob \| history
Lib/test/test_re.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history