]> granicus.if.org Git - python/commitdiff
bpo-30375: Correct the stacklevel of regex compiling warnings. (#1595)
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 16 May 2017 12:16:15 +0000 (15:16 +0300)
committerGitHub <noreply@github.com>
Tue, 16 May 2017 12:16:15 +0000 (15:16 +0300)
Warnings emitted when compile a regular expression now always point
to the line in the user code.  Previously they could point into inners
of the re module if emitted from inside of groups or conditionals.

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index f72408f010b1fa7537d733598fc2c9ce9622ba35..d59d642e644b56ff1e6cd76bcd06a504dc6073be 100644 (file)
@@ -414,7 +414,7 @@ def _uniq(items):
             newitems.append(item)
     return newitems
 
-def _parse_sub(source, state, verbose, nested=True):
+def _parse_sub(source, state, verbose, nested):
     # parse an alternation: a|b|c
 
     items = []
@@ -422,7 +422,8 @@ def _parse_sub(source, state, verbose, nested=True):
     sourcematch = source.match
     start = source.tell()
     while True:
-        itemsappend(_parse(source, state, verbose, not nested and not items))
+        itemsappend(_parse(source, state, verbose, nested + 1,
+                           not nested and not items))
         if not sourcematch("|"):
             break
 
@@ -471,7 +472,7 @@ def _parse_sub(source, state, verbose, nested=True):
     subpattern.append((BRANCH, (None, items)))
     return subpattern
 
-def _parse(source, state, verbose, first=False):
+def _parse(source, state, verbose, nested, first=False):
     # parse a simple pattern
     subpattern = SubPattern(state)
 
@@ -708,7 +709,7 @@ def _parse(source, state, verbose, first=False):
                         lookbehindgroups = state.lookbehindgroups
                         if lookbehindgroups is None:
                             state.lookbehindgroups = state.groups
-                    p = _parse_sub(source, state, verbose)
+                    p = _parse_sub(source, state, verbose, nested + 1)
                     if dir < 0:
                         if lookbehindgroups is None:
                             state.lookbehindgroups = None
@@ -744,9 +745,9 @@ def _parse(source, state, verbose, first=False):
                             msg = "invalid group reference %d" % condgroup
                             raise source.error(msg, len(condname) + 1)
                     state.checklookbehindgroup(condgroup, source)
-                    item_yes = _parse(source, state, verbose)
+                    item_yes = _parse(source, state, verbose, nested + 1)
                     if source.match("|"):
-                        item_no = _parse(source, state, verbose)
+                        item_no = _parse(source, state, verbose, nested + 1)
                         if source.next == "|":
                             raise source.error("conditional backref with more than two branches")
                     else:
@@ -768,7 +769,7 @@ def _parse(source, state, verbose, first=False):
                                     source.string[:20],  # truncate long regexes
                                     ' (truncated)' if len(source.string) > 20 else '',
                                 ),
-                                DeprecationWarning, stacklevel=7
+                                DeprecationWarning, stacklevel=nested + 6
                             )
                         if (state.flags & SRE_FLAG_VERBOSE) and not verbose:
                             raise Verbose
@@ -788,7 +789,7 @@ def _parse(source, state, verbose, first=False):
                     raise source.error(err.msg, len(name) + 1) from None
             sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
                            not (del_flags & SRE_FLAG_VERBOSE))
-            p = _parse_sub(source, state, sub_verbose)
+            p = _parse_sub(source, state, sub_verbose, nested + 1)
             if not source.match(")"):
                 raise source.error("missing ), unterminated subpattern",
                                    source.tell() - start)
@@ -886,7 +887,7 @@ def parse(str, flags=0, pattern=None):
     pattern.str = str
 
     try:
-        p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, False)
+        p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
     except Verbose:
         # the VERBOSE flag was switched on inside the pattern.  to be
         # on the safe side, we'll parse the whole thing again...
@@ -894,7 +895,7 @@ def parse(str, flags=0, pattern=None):
         pattern.flags = flags | SRE_FLAG_VERBOSE
         pattern.str = str
         source.seek(0)
-        p = _parse_sub(source, pattern, True, False)
+        p = _parse_sub(source, pattern, True, 0)
 
     p.pattern.flags = fix_flags(str, p.pattern.flags)
 
index 1bb26540547150c1180807a6603e18dfd924463f..027df4092da8d8dd4e187afd6027c99766fc8886 100644 (file)
@@ -1370,6 +1370,7 @@ class ReTests(unittest.TestCase):
             str(warns.warnings[0].message),
             'Flags not at the start of the expression %s' % p
         )
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
         p = upper_char + '(?i)%s' % ('.?' * 100)
         with self.assertWarns(DeprecationWarning) as warns:
@@ -1378,6 +1379,7 @@ class ReTests(unittest.TestCase):
             str(warns.warnings[0].message),
             'Flags not at the start of the expression %s (truncated)' % p[:20]
         )
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
         with self.assertWarns(DeprecationWarning):
             self.assertTrue(re.match('(?s).(?i)' + upper_char, '\n' + lower_char))
@@ -1389,14 +1391,23 @@ class ReTests(unittest.TestCase):
             self.assertTrue(re.match('^(?i)' + upper_char, lower_char))
         with self.assertWarns(DeprecationWarning):
             self.assertTrue(re.match('$|(?i)' + upper_char, lower_char))
-        with self.assertWarns(DeprecationWarning):
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.match('(?:(?i)' + upper_char + ')', lower_char))
-        with self.assertWarns(DeprecationWarning):
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.fullmatch('(^)?(?(1)(?i)' + upper_char + ')',
                                          lower_char))
-        with self.assertWarns(DeprecationWarning):
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
+        with self.assertWarns(DeprecationWarning) as warns:
             self.assertTrue(re.fullmatch('($)?(?(1)|(?i)' + upper_char + ')',
                                          lower_char))
+        self.assertRegex(str(warns.warnings[0].message),
+                         'Flags not at the start')
+        self.assertEqual(warns.warnings[0].filename, __file__)
 
 
     def test_dollar_matches_twice(self):
index 39f583f4c4f25781be2fc9ce336829e093fb1283..53750735f3a1d973f17533d100396dcb02420542 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -323,6 +323,10 @@ Extension Modules
 Library
 -------
 
+- bpo-30375: Warnings emitted when compile a regular expression now always
+  point to the line in the user code.  Previously they could point into inners
+  of the re module if emitted from inside of groups or conditionals.
+
 - bpo-30329: imaplib and poplib now catch the Windows socket WSAEINVAL error
   (code 10022) on shutdown(SHUT_RDWR): An invalid operation was attempted.
   This error occurs sometimes on SSL connections.