From: Serhiy Storchaka Date: Tue, 16 May 2017 15:16:45 +0000 (+0300) Subject: [3.5] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (... X-Git-Tag: v3.5.4rc1~150 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=24b5ed230df65f6a1f9d8dd0c4409377576113d9;p=python [3.5] bpo-30375: Correct the stacklevel of regex compiling warnings. (GH-1595) (#1605) Warnings emitted when compile a regular expression now always point to the line in the user code. Previously they could point into inners of the re module if emitted from inside of groups or conditionals.. (cherry picked from commit c7ac7280c321b3c1679fe5f657a6be0f86adf173) --- diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 4ff50d1006..df947842e8 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -309,7 +309,7 @@ def isname(name): return False return True -def _class_escape(source, escape): +def _class_escape(source, escape, nested): # handle escape code inside character class code = ESCAPES.get(escape) if code: @@ -353,13 +353,13 @@ def _class_escape(source, escape): if c in ASCIILETTERS: import warnings warnings.warn('bad escape %s' % escape, - DeprecationWarning, stacklevel=8) + DeprecationWarning, stacklevel=nested + 6) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) -def _escape(source, escape, state): +def _escape(source, escape, state, nested): # handle escape code in expression code = CATEGORIES.get(escape) if code: @@ -420,13 +420,13 @@ def _escape(source, escape, state): if c in ASCIILETTERS: import warnings warnings.warn('bad escape %s' % escape, - DeprecationWarning, stacklevel=8) + DeprecationWarning, stacklevel=nested + 6) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) -def _parse_sub(source, state, nested=True): +def _parse_sub(source, state, nested): # parse an alternation: a|b|c items = [] @@ -434,7 +434,7 @@ def _parse_sub(source, state, nested=True): sourcematch = source.match start = source.tell() while True: - itemsappend(_parse(source, state)) + itemsappend(_parse(source, state, nested + 1)) if not sourcematch("|"): break @@ -476,10 +476,10 @@ def _parse_sub(source, state, nested=True): subpattern.append((BRANCH, (None, items))) return subpattern -def _parse_sub_cond(source, state, condgroup): - item_yes = _parse(source, state) +def _parse_sub_cond(source, state, condgroup, nested): + item_yes = _parse(source, state, nested + 1) if source.match("|"): - item_no = _parse(source, state) + item_no = _parse(source, state, nested + 1) if source.next == "|": raise source.error("conditional backref with more than two branches") else: @@ -488,7 +488,7 @@ def _parse_sub_cond(source, state, condgroup): subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern -def _parse(source, state): +def _parse(source, state, nested): # parse a simple pattern subpattern = SubPattern(state) @@ -521,7 +521,7 @@ def _parse(source, state): continue if this[0] == "\\": - code = _escape(source, this, state) + code = _escape(source, this, state, nested + 1) subpatternappend(code) elif this not in SPECIAL_CHARS: @@ -546,7 +546,7 @@ def _parse(source, state): if this == "]" and set != start: break elif this[0] == "\\": - code1 = _class_escape(source, this) + code1 = _class_escape(source, this, nested + 1) else: code1 = LITERAL, _ord(this) if sourcematch("-"): @@ -562,7 +562,7 @@ def _parse(source, state): setappend((LITERAL, _ord("-"))) break if that[0] == "\\": - code2 = _class_escape(source, that) + code2 = _class_escape(source, that, nested + 1) else: code2 = LITERAL, _ord(that) if code1[0] != LITERAL or code2[0] != LITERAL: @@ -713,7 +713,7 @@ def _parse(source, state): lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups - p = _parse_sub(source, state) + p = _parse_sub(source, state, nested + 1) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None @@ -773,9 +773,9 @@ def _parse(source, state): except error as err: raise source.error(err.msg, len(name) + 1) from None if condgroup: - p = _parse_sub_cond(source, state, condgroup) + p = _parse_sub_cond(source, state, condgroup, nested + 1) else: - p = _parse_sub(source, state) + p = _parse_sub(source, state, nested + 1) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 9acd5abbfd..839bf275af 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -638,14 +638,18 @@ class ReTests(unittest.TestCase): re.purge() # for warnings for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': with self.subTest(c): - with self.assertWarns(DeprecationWarning): + with self.assertWarns(DeprecationWarning) as warns: self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c) self.assertIsNone(re.match('\\%c' % c, 'a')) + self.assertRegex(str(warns.warnings[0].message), 'bad escape') + self.assertEqual(warns.warnings[0].filename, __file__) for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': with self.subTest(c): - with self.assertWarns(DeprecationWarning): + with self.assertWarns(DeprecationWarning) as warns: self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c) self.assertIsNone(re.match('[\\%c]' % c, 'a')) + self.assertRegex(str(warns.warnings[0].message), 'bad escape') + self.assertEqual(warns.warnings[0].filename, __file__) def test_string_boundaries(self): # See http://bugs.python.org/issue10713 diff --git a/Misc/NEWS b/Misc/NEWS index 41fe80f44c..6b4b401aa6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,10 @@ Extension Modules Library ------- +- bpo-30375: Warnings emitted when compile a regular expression now always + point to the line in the user code. Previously they could point into inners + of the re module if emitted from inside of groups or conditionals. + - bpo-30048: Fixed ``Task.cancel()`` can be ignored when the task is running coroutine and the coroutine returned without any more ``await``.