]> granicus.if.org Git - python/commitdiff
Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
authorSerhiy Storchaka <storchaka@gmail.com>
Mon, 1 Dec 2014 09:50:07 +0000 (11:50 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Mon, 1 Dec 2014 09:50:07 +0000 (11:50 +0200)
re.ASCII. It was newer worked.

Doc/library/re.rst
Lib/sre_parse.py
Lib/test/test_re.py
Misc/NEWS

index d1823aa8081301fcc87d3a879af6e1a83a443023..60ded8b2f4a30ff8babf96c6f728fc974de9dde3 100644 (file)
@@ -521,7 +521,11 @@ form.
    current locale. The use of this flag is discouraged as the locale mechanism
    is very unreliable, and it only handles one "culture" at a time anyway;
    you should use Unicode matching instead, which is the default in Python 3
-   for Unicode (str) patterns.
+   for Unicode (str) patterns. This flag makes sense only with bytes patterns.
+
+   .. deprecated-removed:: 3.5 3.6
+      Deprecated the use of  :const:`re.LOCALE` with string patterns or
+      :const:`re.ASCII`.
 
 
 .. data:: M
index 8d9a09a860dde4e0e054777f7670cb905c1092b0..1a7d3162532eeab93adbee1fbfc724c95d9d973d 100644 (file)
@@ -751,6 +751,11 @@ def _parse(source, state):
 def fix_flags(src, flags):
     # Check and fix flags according to the type of pattern (str or bytes)
     if isinstance(src, str):
+        if flags & SRE_FLAG_LOCALE:
+            import warnings
+            warnings.warn("LOCALE flag with a str pattern is deprecated. "
+                          "Will be an error in 3.6",
+                          DeprecationWarning, stacklevel=6)
         if not flags & SRE_FLAG_ASCII:
             flags |= SRE_FLAG_UNICODE
         elif flags & SRE_FLAG_UNICODE:
@@ -758,6 +763,11 @@ def fix_flags(src, flags):
     else:
         if flags & SRE_FLAG_UNICODE:
             raise ValueError("can't use UNICODE flag with a bytes pattern")
+        if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
+            import warnings
+            warnings.warn("ASCII and LOCALE flags are incompatible. "
+                          "Will be an error in 3.6",
+                          DeprecationWarning, stacklevel=6)
     return flags
 
 def parse(str, flags=0, pattern=None):
index f8b33a48981f055bdcbf0c61087393825fd6a6cc..6e90b2fec9a7f30c33b622f4623c624bba4973f0 100644 (file)
@@ -502,10 +502,6 @@ class ReTests(unittest.TestCase):
                                    "abcd abc bcd bx", re.ASCII).group(1), "bx")
         self.assertEqual(re.search(r"\B(b.)\B",
                                    "abc bcd bc abxd", re.ASCII).group(1), "bx")
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
         self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -526,8 +522,6 @@ class ReTests(unittest.TestCase):
                                    b"1aa! a").group(0), b"1aa! a")
         self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                    "1aa! a", re.ASCII).group(0), "1aa! a")
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
         self.assertEqual(re.search(br"\d\D\w\W\s\S",
                                    b"1aa! a", re.LOCALE).group(0), b"1aa! a")
 
@@ -693,9 +687,12 @@ class ReTests(unittest.TestCase):
         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+        self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
 
         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
         self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
+        self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
+        self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
 
     def test_not_literal(self):
         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
@@ -780,8 +777,10 @@ class ReTests(unittest.TestCase):
         self.assertEqual(re.X, re.VERBOSE)
 
     def test_flags(self):
-        for flag in [re.I, re.M, re.X, re.S, re.L]:
+        for flag in [re.I, re.M, re.X, re.S, re.A, re.U]:
             self.assertTrue(re.compile('^pattern$', flag))
+        for flag in [re.I, re.M, re.X, re.S, re.A, re.L]:
+            self.assertTrue(re.compile(b'^pattern$', flag))
 
     def test_sre_character_literals(self):
         for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
@@ -1146,6 +1145,52 @@ class ReTests(unittest.TestCase):
         self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE)
         self.assertRaises(ValueError, re.compile, '(?au)\w')
 
+    def test_locale_flag(self):
+        import locale
+        _, enc = locale.getlocale(locale.LC_CTYPE)
+        # Search non-ASCII letter
+        for i in range(128, 256):
+            try:
+                c = bytes([i]).decode(enc)
+                sletter = c.lower()
+                if sletter == c: continue
+                bletter = sletter.encode(enc)
+                if len(bletter) != 1: continue
+                if bletter.decode(enc) != sletter: continue
+                bpat = re.escape(bytes([i]))
+                break
+            except (UnicodeError, TypeError):
+                pass
+        else:
+            bletter = None
+            bpat = b'A'
+        # Bytes patterns
+        pat = re.compile(bpat, re.LOCALE | re.IGNORECASE)
+        if bletter:
+            self.assertTrue(pat.match(bletter))
+        pat = re.compile(b'(?L)' + bpat, re.IGNORECASE)
+        if bletter:
+            self.assertTrue(pat.match(bletter))
+        pat = re.compile(bpat, re.IGNORECASE)
+        if bletter:
+            self.assertIsNone(pat.match(bletter))
+        pat = re.compile(b'\w', re.LOCALE)
+        if bletter:
+            self.assertTrue(pat.match(bletter))
+        pat = re.compile(b'(?L)\w')
+        if bletter:
+            self.assertTrue(pat.match(bletter))
+        pat = re.compile(b'\w')
+        if bletter:
+            self.assertIsNone(pat.match(bletter))
+        # Incompatibilities
+        self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
+        self.assertWarns(DeprecationWarning, re.compile, '(?L)')
+        self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
+        self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
+        self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
+        self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+
     def test_bug_6509(self):
         # Replacement strings of both types must parse properly.
         # all strings
@@ -1477,6 +1522,10 @@ class PatternReprTests(unittest.TestCase):
         self.check_flags(b'bytes pattern', re.A,
                          "re.compile(b'bytes pattern', re.ASCII)")
 
+    def test_locale(self):
+        self.check_flags(b'bytes pattern', re.L,
+                         "re.compile(b'bytes pattern', re.LOCALE)")
+
     def test_quotes(self):
         self.check('random "double quoted" pattern',
             '''re.compile('random "double quoted" pattern')''')
@@ -1590,8 +1639,16 @@ class ExternalTests(unittest.TestCase):
                     pass
                 else:
                     with self.subTest('bytes pattern match'):
-                        bpat = re.compile(bpat)
-                        self.assertTrue(bpat.search(bs))
+                        obj = re.compile(bpat)
+                        self.assertTrue(obj.search(bs))
+
+                    # Try the match with LOCALE enabled, and check that it
+                    # still succeeds.
+                    with self.subTest('locale-sensitive match'):
+                        obj = re.compile(bpat, re.LOCALE)
+                        result = obj.search(bs)
+                        if result is None:
+                            print('=== Fails on locale-sensitive match', t)
 
                 # Try the match with the search area limited to the extent
                 # of the match and see if it still succeeds.  \B will
@@ -1609,13 +1666,6 @@ class ExternalTests(unittest.TestCase):
                     obj = re.compile(pattern, re.IGNORECASE)
                     self.assertTrue(obj.search(s))
 
-                # Try the match with LOCALE enabled, and check that it
-                # still succeeds.
-                if '(?u)' not in pattern:
-                    with self.subTest('locale-sensitive match'):
-                        obj = re.compile(pattern, re.LOCALE)
-                        self.assertTrue(obj.search(s))
-
                 # Try the match with UNICODE locale enabled, and check
                 # that it still succeeds.
                 with self.subTest('unicode-sensitive match'):
index d3a02a6feb9cb5dae8b0ed757675319a3e63ae38..faac092406953ac8d333d20f09a19f68315f6107 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -191,6 +191,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or
+  re.ASCII. It was newer worked.
+
 - Issue #22902: The "ip" command is now used on Linux to determine MAC address
   in uuid.getnode().  Pach by Bruno Cauet.