]> granicus.if.org Git - python/commitdiff
bpo-30298: Weaken the condition of deprecation warnings for inline modifiers. (#1490)
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 10 May 2017 03:05:20 +0000 (06:05 +0300)
committerGitHub <noreply@github.com>
Wed, 10 May 2017 03:05:20 +0000 (06:05 +0300)
Now allowed several subsequential inline modifiers at the start of the
pattern (e.g. '(?i)(?s)...').  In verbose mode whitespaces and comments
now are allowed before and between inline modifiers (e.g.
'(?x) (?i) (?s)...').

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index ab37fd3fe2f46f2257e80085914c2178f6bcab9b..d8d1bd552fbee0856ad825ae49997aa2f7673300 100644 (file)
@@ -412,7 +412,7 @@ def _parse_sub(source, state, verbose, nested=True):
     sourcematch = source.match
     start = source.tell()
     while True:
-        itemsappend(_parse(source, state, verbose))
+        itemsappend(_parse(source, state, verbose, not nested and not items))
         if not sourcematch("|"):
             break
 
@@ -466,7 +466,7 @@ def _parse_sub_cond(source, state, condgroup, verbose):
     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
     return subpattern
 
-def _parse(source, state, verbose):
+def _parse(source, state, verbose, first=False):
     # parse a simple pattern
     subpattern = SubPattern(state)
 
@@ -730,10 +730,9 @@ def _parse(source, state, verbose):
                     state.checklookbehindgroup(condgroup, source)
                 elif char in FLAGS or char == "-":
                     # flags
-                    pos = source.pos
                     flags = _parse_flags(source, state, char)
                     if flags is None:  # global flags
-                        if pos != 3:  # "(?x"
+                        if not first or subpattern:
                             import warnings
                             warnings.warn(
                                 'Flags not at the start of the expression %s%s' % (
@@ -742,6 +741,8 @@ def _parse(source, state, verbose):
                                 ),
                                 DeprecationWarning, stacklevel=7
                             )
+                        if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
+                            raise Verbose
                         continue
                     add_flags, del_flags = flags
                     group = None
@@ -795,9 +796,6 @@ def _parse_flags(source, state, char):
                 msg = "unknown flag" if char.isalpha() else "missing -, : or )"
                 raise source.error(msg, len(char))
     if char == ")":
-        if ((add_flags & SRE_FLAG_VERBOSE) and
-            not (state.flags & SRE_FLAG_VERBOSE)):
-            raise Verbose
         state.flags |= add_flags
         return None
     if add_flags & GLOBAL_FLAGS:
index 3129f7e9888bc59db6f989380097ccc16a2f83cd..4d71eea517e3e42fe3f74e3c973b1bb04840ece6 100644 (file)
@@ -1325,32 +1325,43 @@ class ReTests(unittest.TestCase):
         upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
         lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
 
-        p = re.compile(upper_char, re.I | re.U)
-        q = p.match(lower_char)
+        p = re.compile('.' + upper_char, re.I | re.S)
+        q = p.match('\n' + lower_char)
         self.assertTrue(q)
 
-        p = re.compile(lower_char, re.I | re.U)
-        q = p.match(upper_char)
+        p = re.compile('.' + lower_char, re.I | re.S)
+        q = p.match('\n' + upper_char)
         self.assertTrue(q)
 
-        p = re.compile('(?i)' + upper_char, re.U)
-        q = p.match(lower_char)
+        p = re.compile('(?i).' + upper_char, re.S)
+        q = p.match('\n' + lower_char)
         self.assertTrue(q)
 
-        p = re.compile('(?i)' + lower_char, re.U)
-        q = p.match(upper_char)
+        p = re.compile('(?i).' + lower_char, re.S)
+        q = p.match('\n' + upper_char)
         self.assertTrue(q)
 
-        p = re.compile('(?iu)' + upper_char)
-        q = p.match(lower_char)
+        p = re.compile('(?is).' + upper_char)
+        q = p.match('\n' + lower_char)
         self.assertTrue(q)
 
-        p = re.compile('(?iu)' + lower_char)
-        q = p.match(upper_char)
+        p = re.compile('(?is).' + lower_char)
+        q = p.match('\n' + upper_char)
         self.assertTrue(q)
 
-        self.assertTrue(re.match('(?ixu) ' + upper_char, lower_char))
-        self.assertTrue(re.match('(?ixu) ' + lower_char, upper_char))
+        p = re.compile('(?s)(?i).' + upper_char)
+        q = p.match('\n' + lower_char)
+        self.assertTrue(q)
+
+        p = re.compile('(?s)(?i).' + lower_char)
+        q = p.match('\n' + upper_char)
+        self.assertTrue(q)
+
+        self.assertTrue(re.match('(?ix) ' + upper_char, lower_char))
+        self.assertTrue(re.match('(?ix) ' + lower_char, upper_char))
+        self.assertTrue(re.match(' (?i) ' + upper_char, lower_char, re.X))
+        self.assertTrue(re.match('(?x) (?i) ' + upper_char, lower_char))
+        self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char, re.X))
 
         p = upper_char + '(?i)'
         with self.assertWarns(DeprecationWarning) as warns:
@@ -1368,6 +1379,26 @@ class ReTests(unittest.TestCase):
             'Flags not at the start of the expression %s (truncated)' % p[:20]
         )
 
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match('(?i) ' + upper_char + ' (?x)', lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match(' (?x) (?i) ' + upper_char, lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
+                                         lower_char))
+        with self.assertWarns(DeprecationWarning):
+            self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
+                                         lower_char))
+
+
     def test_dollar_matches_twice(self):
         "$ matches the end of string, and just before the terminating \n"
         pattern = re.compile('$')
index 7a79521efd745225b319975e586ceb776f681380..997a03411de4532d80e6051aa6ae2313a5ddd098 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -320,6 +320,12 @@ Extension Modules
 Library
 -------
 
+- bpo-30298: Weaken the condition of deprecation warnings for inline modifiers.
+  Now allowed several subsequential inline modifiers at the start of the
+  pattern (e.g. ``'(?i)(?s)...'``).  In verbose mode whitespaces and comments
+  now are allowed before and between inline modifiers (e.g.
+  ``'(?x) (?i) (?s)...'``).
+
 - bpo-30285: Optimized case-insensitive matching and searching of regular
   expressions.