From: Guido van Rossum Date: Thu, 3 Jan 2008 19:12:44 +0000 (+0000) Subject: Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh: X-Git-Tag: v2.6a1~778 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ae04c3356ed2aec0e9e2c39096a3ccd05722575a;p=python Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh: Regular Expression inline flags not handled correctly for some unicode characters. (Forward port from 2.5.2.) --- diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 71095991ec..22ab2fd148 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -525,7 +525,7 @@ def compile(p, flags=0): indexgroup[i] = k return _sre.compile( - pattern, flags, code, + pattern, flags | p.pattern.flags, code, p.pattern.groups-1, groupindex, indexgroup ) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f1fdfba613..3056ef35f3 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -642,6 +642,36 @@ class ReTests(unittest.TestCase): self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ()) + def test_inline_flags(self): + # Bug #1700 + upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow + lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow + + p = re.compile(upper_char, re.I | re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile(lower_char, re.I | re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + upper_char, re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + lower_char, re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + upper_char) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + lower_char) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Misc/NEWS b/Misc/NEWS index 50fecb77dc..3e3f07446c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -348,6 +348,9 @@ Core and builtins Library ------- +- Issue #1700: Regular expression inline flags incorrectly handle certain + unicode characters. + - Issue #1689: PEP 3141, numeric abstract base classes. - Tk issue #1851526: Return results from Python callbacks to Tcl as