Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in

author Serhiy Storchaka <storchaka@gmail.com>

Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)
diff --git a/Doc/library/re.rst b/Doc/library/re.rst

index ceb795976d539e73d4462e0c7768b0023bccf635..52d2ec0ac6d82606349d3f8b1f1f2e1a424423b1 100644 (file)
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -317,8 +317,9 @@ The special characters are:
  
  
  The special sequences consist of ``'\'`` and a character from the list below.
-If the ordinary character is not on the list, then the resulting RE will match
-the second character.  For example, ``\$`` matches the character ``'$'``.
+If the ordinary character is not ASCII digit or ASCII letter, then the
+resulting RE will match the second character.  For example, ``\$`` matches the
+character ``'$'``.
  
  ``\number``
     Matches the contents of the group of the same number.  Groups are numbered
@@ -438,9 +439,8 @@ three digits in length.
  .. versionchanged:: 3.3
     The ``'\u'`` and ``'\U'`` escape sequences have been added.
  
-.. deprecated-removed:: 3.5 3.6
-   Unknown escapes consist of ``'\'`` and ASCII letter now raise a
-   deprecation warning and will be forbidden in Python 3.6.
+.. versionchanged:: 3.6
+   Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
  
  
  .. seealso::
@@ -528,11 +528,11 @@ form.
     current locale. The use of this flag is discouraged as the locale mechanism
     is very unreliable, and it only handles one "culture" at a time anyway;
     you should use Unicode matching instead, which is the default in Python 3
-   for Unicode (str) patterns. This flag makes sense only with bytes patterns.
+   for Unicode (str) patterns. This flag can be used only with bytes patterns.
  
-   .. deprecated-removed:: 3.5 3.6
-      Deprecated the use of  :const:`re.LOCALE` with string patterns or
-      :const:`re.ASCII`.
+   .. versionchanged:: 3.6
+      :const:`re.LOCALE` can be used only with bytes patterns and is
+      not compatible with :const:`re.ASCII`.
  
  
  .. data:: M
@@ -738,9 +738,8 @@ form.
     .. versionchanged:: 3.5
        Unmatched groups are replaced with an empty string.
  
-   .. deprecated-removed:: 3.5 3.6
-      Unknown escapes consist of ``'\'`` and ASCII letter now raise a
-      deprecation warning and will be forbidden in Python 3.6.
+   .. versionchanged:: 3.6
+      Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
  
  
  .. function:: subn(pattern, repl, string, count=0, flags=0)
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index 4ff50d1006a607356349b75f738230cdef58415e..521e379e72065931bb2184350450d9a21c765b16 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -282,33 +282,6 @@ class Tokenizer:
      def error(self, msg, offset=0):
          return error(msg, self.string, self.tell() - offset)
  
-# The following three functions are not used in this module anymore, but we keep
-# them here (with DeprecationWarnings) for backwards compatibility.
-
-def isident(char):
-    import warnings
-    warnings.warn('sre_parse.isident() will be removed in 3.5',
-                  DeprecationWarning, stacklevel=2)
-    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
-
-def isdigit(char):
-    import warnings
-    warnings.warn('sre_parse.isdigit() will be removed in 3.5',
-                  DeprecationWarning, stacklevel=2)
-    return "0" <= char <= "9"
-
-def isname(name):
-    import warnings
-    warnings.warn('sre_parse.isname() will be removed in 3.5',
-                  DeprecationWarning, stacklevel=2)
-    # check that group name is a valid string
-    if not isident(name[0]):
-        return False
-    for char in name[1:]:
-        if not isident(char) and not isdigit(char):
-            return False
-    return True
-
  def _class_escape(source, escape):
      # handle escape code inside character class
      code = ESCAPES.get(escape)
@@ -351,9 +324,7 @@ def _class_escape(source, escape):
              raise ValueError
          if len(escape) == 2:
              if c in ASCIILETTERS:
-                import warnings
-                warnings.warn('bad escape %s' % escape,
-                              DeprecationWarning, stacklevel=8)
+                raise source.error('bad escape %s' % escape, len(escape))
              return LITERAL, ord(escape[1])
      except ValueError:
          pass
@@ -418,9 +389,7 @@ def _escape(source, escape, state):
              raise source.error("invalid group reference", len(escape))
          if len(escape) == 2:
              if c in ASCIILETTERS:
-                import warnings
-                warnings.warn('bad escape %s' % escape,
-                              DeprecationWarning, stacklevel=8)
+                raise source.error("bad escape %s" % escape, len(escape))
              return LITERAL, ord(escape[1])
      except ValueError:
          pass
@@ -798,10 +767,7 @@ def fix_flags(src, flags):
      # Check and fix flags according to the type of pattern (str or bytes)
      if isinstance(src, str):
          if flags & SRE_FLAG_LOCALE:
-            import warnings
-            warnings.warn("LOCALE flag with a str pattern is deprecated. "
-                          "Will be an error in 3.6",
-                          DeprecationWarning, stacklevel=6)
+            raise ValueError("cannot use LOCALE flag with a str pattern")
          if not flags & SRE_FLAG_ASCII:
              flags |= SRE_FLAG_UNICODE
          elif flags & SRE_FLAG_UNICODE:
@@ -810,10 +776,7 @@ def fix_flags(src, flags):
          if flags & SRE_FLAG_UNICODE:
              raise ValueError("cannot use UNICODE flag with a bytes pattern")
          if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
-            import warnings
-            warnings.warn("ASCII and LOCALE flags are incompatible. "
-                          "Will be an error in 3.6",
-                          DeprecationWarning, stacklevel=6)
+            raise ValueError("ASCII and LOCALE flags are incompatible")
      return flags
  
  def parse(str, flags=0, pattern=None):
@@ -914,9 +877,7 @@ def parse_template(source, pattern):
                      this = chr(ESCAPES[this][1])
                  except KeyError:
                      if c in ASCIILETTERS:
-                        import warnings
-                        warnings.warn('bad escape %s' % this,
-                                      DeprecationWarning, stacklevel=4)
+                        raise s.error('bad escape %s' % this, len(this))
                  lappend(this)
          else:
              lappend(this)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py

index 7a741416b48c74dad593d699e6a65c2fbdd6e7e3..e27591c4fc07abcdb36bf7df4a402b12a2bf907b 100644 (file)
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -124,7 +124,7 @@ class ReTests(unittest.TestCase):
                           (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
          for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
              with self.subTest(c):
-                with self.assertWarns(DeprecationWarning):
+                with self.assertRaises(re.error):
                      self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
  
          self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
@@ -633,14 +633,10 @@ class ReTests(unittest.TestCase):
          re.purge()  # for warnings
          for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
              with self.subTest(c):
-                with self.assertWarns(DeprecationWarning):
-                    self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
-                    self.assertIsNone(re.match('\\%c' % c, 'a'))
+                self.assertRaises(re.error, re.compile, '\\%c' % c)
          for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
              with self.subTest(c):
-                with self.assertWarns(DeprecationWarning):
-                    self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
-                    self.assertIsNone(re.match('[\\%c]' % c, 'a'))
+                self.assertRaises(re.error, re.compile, '[\\%c]' % c)
  
      def test_string_boundaries(self):
          # See http://bugs.python.org/issue10713
@@ -993,10 +989,8 @@ class ReTests(unittest.TestCase):
              self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
              self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
              self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
-        with self.assertWarns(DeprecationWarning):
-            self.assertTrue(re.match(br"\u1234", b'u1234'))
-        with self.assertWarns(DeprecationWarning):
-            self.assertTrue(re.match(br"\U00012345", b'U00012345'))
+        self.assertRaises(re.error, re.compile, br"\u1234")
+        self.assertRaises(re.error, re.compile, br"\U00012345")
          self.assertTrue(re.match(br"\0", b"\000"))
          self.assertTrue(re.match(br"\08", b"\0008"))
          self.assertTrue(re.match(br"\01", b"\001"))
@@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase):
              self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
              self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
              self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
-        with self.assertWarns(DeprecationWarning):
-            self.assertTrue(re.match(br"[\u1234]", b'u'))
-        with self.assertWarns(DeprecationWarning):
-            self.assertTrue(re.match(br"[\U00012345]", b'U'))
+        self.assertRaises(re.error, re.compile, br"[\u1234]")
+        self.assertRaises(re.error, re.compile, br"[\U00012345]")
          self.checkPatternError(br"[\567]",
                                 r'octal escape value \567 outside of '
                                 r'range 0-0o377', 1)
@@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase):
          if bletter:
              self.assertIsNone(pat.match(bletter))
          # Incompatibilities
-        self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
-        self.assertWarns(DeprecationWarning, re.compile, '(?L)')
-        self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
-        self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
-        self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
-        self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+        self.assertRaises(ValueError, re.compile, '', re.LOCALE)
+        self.assertRaises(ValueError, re.compile, '(?L)')
+        self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
+        self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
+        self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
+        self.assertRaises(ValueError, re.compile, b'(?aL)')
  
      def test_bug_6509(self):
          # Replacement strings of both types must parse properly.
@@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase):
          # Test behaviour when not given a string or pattern as parameter
          self.assertRaises(TypeError, re.compile, 0)
  
-    def test_bug_13899(self):
-        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
-        # nothing. Ditto B and Z.
-        with self.assertWarns(DeprecationWarning):
-            self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
-                             ['A', 'B', '\b', 'C', 'Z'])
-
      @bigmemtest(size=_2G, memuse=1)
      def test_large_search(self, size):
          # Issue #10182: indices were 32-bit-truncated.
diff --git a/Misc/NEWS b/Misc/NEWS

index 9e95932dbf2ef821edbaa53c4716aa0e24887c6b..b5cf31489238181dcee22b6f433b53c2a2bf7988 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -38,6 +38,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
+  regular expressions now are errors.
+
  - Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519).
    Initial patch by Jelle Zijlstra.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sat, 11 Jun 2016 16:15:00 +0000 (19:15 +0300)
Doc/library/re.rst		patch \| blob \| history
Lib/sre_parse.py		patch \| blob \| history
Lib/test/test_re.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history