]> granicus.if.org Git - python/commitdiff
[3.5] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (...
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 16 May 2017 15:16:45 +0000 (18:16 +0300)
committerGitHub <noreply@github.com>
Tue, 16 May 2017 15:16:45 +0000 (18:16 +0300)
Warnings emitted when compile a regular expression now always point
to the line in the user code.  Previously they could point into inners
of the re module if emitted from inside of groups or conditionals..
(cherry picked from commit c7ac7280c321b3c1679fe5f657a6be0f86adf173)

Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index 4ff50d1006a607356349b75f738230cdef58415e..df947842e847dc90262ad4ca226cdd5640774faf 100644 (file)
@@ -309,7 +309,7 @@ def isname(name):
             return False
     return True
 
-def _class_escape(source, escape):
+def _class_escape(source, escape, nested):
     # handle escape code inside character class
     code = ESCAPES.get(escape)
     if code:
@@ -353,13 +353,13 @@ def _class_escape(source, escape):
             if c in ASCIILETTERS:
                 import warnings
                 warnings.warn('bad escape %s' % escape,
-                              DeprecationWarning, stacklevel=8)
+                              DeprecationWarning, stacklevel=nested + 6)
             return LITERAL, ord(escape[1])
     except ValueError:
         pass
     raise source.error("bad escape %s" % escape, len(escape))
 
-def _escape(source, escape, state):
+def _escape(source, escape, state, nested):
     # handle escape code in expression
     code = CATEGORIES.get(escape)
     if code:
@@ -420,13 +420,13 @@ def _escape(source, escape, state):
             if c in ASCIILETTERS:
                 import warnings
                 warnings.warn('bad escape %s' % escape,
-                              DeprecationWarning, stacklevel=8)
+                              DeprecationWarning, stacklevel=nested + 6)
             return LITERAL, ord(escape[1])
     except ValueError:
         pass
     raise source.error("bad escape %s" % escape, len(escape))
 
-def _parse_sub(source, state, nested=True):
+def _parse_sub(source, state, nested):
     # parse an alternation: a|b|c
 
     items = []
@@ -434,7 +434,7 @@ def _parse_sub(source, state, nested=True):
     sourcematch = source.match
     start = source.tell()
     while True:
-        itemsappend(_parse(source, state))
+        itemsappend(_parse(source, state, nested + 1))
         if not sourcematch("|"):
             break
 
@@ -476,10 +476,10 @@ def _parse_sub(source, state, nested=True):
     subpattern.append((BRANCH, (None, items)))
     return subpattern
 
-def _parse_sub_cond(source, state, condgroup):
-    item_yes = _parse(source, state)
+def _parse_sub_cond(source, state, condgroup, nested):
+    item_yes = _parse(source, state, nested + 1)
     if source.match("|"):
-        item_no = _parse(source, state)
+        item_no = _parse(source, state, nested + 1)
         if source.next == "|":
             raise source.error("conditional backref with more than two branches")
     else:
@@ -488,7 +488,7 @@ def _parse_sub_cond(source, state, condgroup):
     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
     return subpattern
 
-def _parse(source, state):
+def _parse(source, state, nested):
     # parse a simple pattern
     subpattern = SubPattern(state)
 
@@ -521,7 +521,7 @@ def _parse(source, state):
                 continue
 
         if this[0] == "\\":
-            code = _escape(source, this, state)
+            code = _escape(source, this, state, nested + 1)
             subpatternappend(code)
 
         elif this not in SPECIAL_CHARS:
@@ -546,7 +546,7 @@ def _parse(source, state):
                 if this == "]" and set != start:
                     break
                 elif this[0] == "\\":
-                    code1 = _class_escape(source, this)
+                    code1 = _class_escape(source, this, nested + 1)
                 else:
                     code1 = LITERAL, _ord(this)
                 if sourcematch("-"):
@@ -562,7 +562,7 @@ def _parse(source, state):
                         setappend((LITERAL, _ord("-")))
                         break
                     if that[0] == "\\":
-                        code2 = _class_escape(source, that)
+                        code2 = _class_escape(source, that, nested + 1)
                     else:
                         code2 = LITERAL, _ord(that)
                     if code1[0] != LITERAL or code2[0] != LITERAL:
@@ -713,7 +713,7 @@ def _parse(source, state):
                         lookbehindgroups = state.lookbehindgroups
                         if lookbehindgroups is None:
                             state.lookbehindgroups = state.groups
-                    p = _parse_sub(source, state)
+                    p = _parse_sub(source, state, nested + 1)
                     if dir < 0:
                         if lookbehindgroups is None:
                             state.lookbehindgroups = None
@@ -773,9 +773,9 @@ def _parse(source, state):
                 except error as err:
                     raise source.error(err.msg, len(name) + 1) from None
             if condgroup:
-                p = _parse_sub_cond(source, state, condgroup)
+                p = _parse_sub_cond(source, state, condgroup, nested + 1)
             else:
-                p = _parse_sub(source, state)
+                p = _parse_sub(source, state, nested + 1)
             if not source.match(")"):
                 raise source.error("missing ), unterminated subpattern",
                                    source.tell() - start)
index 9acd5abbfd77763838315cdc2f536fd65b13b616..839bf275af35377b986bb2fc447fb13a3248d522 100644 (file)
@@ -638,14 +638,18 @@ class ReTests(unittest.TestCase):
         re.purge()  # for warnings
         for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
             with self.subTest(c):
-                with self.assertWarns(DeprecationWarning):
+                with self.assertWarns(DeprecationWarning) as warns:
                     self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
                     self.assertIsNone(re.match('\\%c' % c, 'a'))
+                self.assertRegex(str(warns.warnings[0].message), 'bad escape')
+                self.assertEqual(warns.warnings[0].filename, __file__)
         for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
             with self.subTest(c):
-                with self.assertWarns(DeprecationWarning):
+                with self.assertWarns(DeprecationWarning) as warns:
                     self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
                     self.assertIsNone(re.match('[\\%c]' % c, 'a'))
+                self.assertRegex(str(warns.warnings[0].message), 'bad escape')
+                self.assertEqual(warns.warnings[0].filename, __file__)
 
     def test_string_boundaries(self):
         # See http://bugs.python.org/issue10713
index 41fe80f44c3f123031a308ea35ba89d99eca2f92..6b4b401aa626158016a8e08e8b69d805c11e6f54 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,10 @@ Extension Modules
 Library
 -------
 
+- bpo-30375: Warnings emitted when compile a regular expression now always
+  point to the line in the user code.  Previously they could point into inners
+  of the re module if emitted from inside of groups or conditionals.
+
 - bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is
   running coroutine and the coroutine returned without any more ``await``.