]> granicus.if.org Git - python/commitdiff
Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh:
authorGuido van Rossum <guido@python.org>
Thu, 3 Jan 2008 19:12:44 +0000 (19:12 +0000)
committerGuido van Rossum <guido@python.org>
Thu, 3 Jan 2008 19:12:44 +0000 (19:12 +0000)
Regular Expression inline flags not handled correctly for some unicode
characters.  (Forward port from 2.5.2.)

Lib/sre_compile.py
Lib/test/test_re.py
Misc/NEWS

index 71095991ecc96961880c768ecedb9dd3d716231f..22ab2fd14889314d7f733728d03850133c1e6b72 100644 (file)
@@ -525,7 +525,7 @@ def compile(p, flags=0):
         indexgroup[i] = k
 
     return _sre.compile(
-        pattern, flags, code,
+        pattern, flags | p.pattern.flags, code,
         p.pattern.groups-1,
         groupindex, indexgroup
         )
index f1fdfba6137323eb00bb373c672d9ebc4df1a557..3056ef35f386cf5dda8f8032d0981f4780d045e9 100644 (file)
@@ -642,6 +642,36 @@ class ReTests(unittest.TestCase):
             self.assertEqual(re.compile("bla").match(a), None)
             self.assertEqual(re.compile("").match(a).groups(), ())
 
+    def test_inline_flags(self):
+        # Bug #1700
+        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
+        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
+
+        p = re.compile(upper_char, re.I | re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile(lower_char, re.I | re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?i)' + upper_char, re.U)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?i)' + lower_char, re.U)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?iu)' + upper_char)
+        q = p.match(lower_char)
+        self.assertNotEqual(q, None)
+
+        p = re.compile('(?iu)' + lower_char)
+        q = p.match(upper_char)
+        self.assertNotEqual(q, None)
+
+
 def run_re_tests():
     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
index 50fecb77dc4ac5ad97bdbbd66540984b7b16aa33..3e3f07446cc5c498fc62b6dd1847403c61240f31 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -348,6 +348,9 @@ Core and builtins
 Library
 -------
 
+- Issue #1700: Regular expression inline flags incorrectly handle certain
+  unicode characters.
+
 - Issue #1689: PEP 3141, numeric abstract base classes.
 
 - Tk issue #1851526: Return results from Python callbacks to Tcl as