From 6df9a82bd3bdb69e1e7febc3997ca4c7c1d629fb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 3 Jan 2008 19:08:15 +0000 Subject: [PATCH] Issue #1700, reported by Nguyen Quan Son, fix by Fredruk Lundh: Regular Expression inline flags not handled correctly for some unicode characters. --- Lib/sre_compile.py | 2 +- Lib/test/test_re.py | 30 ++++++++++++++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index 71095991ec..22ab2fd148 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -525,7 +525,7 @@ def compile(p, flags=0): indexgroup[i] = k return _sre.compile( - pattern, flags, code, + pattern, flags | p.pattern.flags, code, p.pattern.groups-1, groupindex, indexgroup ) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 3f81993d14..7d8eab87ff 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -637,6 +637,36 @@ class ReTests(unittest.TestCase): self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ()) + def test_inline_flags(self): + # Bug #1700 + upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow + lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow + + p = re.compile(upper_char, re.I | re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile(lower_char, re.I | re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + upper_char, re.U) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?i)' + lower_char, re.U) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + upper_char) + q = p.match(lower_char) + self.assertNotEqual(q, None) + + p = re.compile('(?iu)' + lower_char) + q = p.match(upper_char) + self.assertNotEqual(q, None) + + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: diff --git a/Misc/NEWS b/Misc/NEWS index 38a047d38a..97027a3cbb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -53,6 +53,9 @@ Core and builtins Library ------- +- Issue #1700: Regular expression inline flags incorrectly handle certain + unicode characters. + - Change ctypes version number to 1.0.3 (when Python 2.5.2 is released, ctypes 1.0.3 will be also be released). -- 2.40.0