]> granicus.if.org Git - python/commitdiff
Closes #14462: allow any valid Python identifier in sre group names, as documented.
authorGeorg Brandl <georg@python.org>
Sun, 14 Apr 2013 09:40:00 +0000 (11:40 +0200)
committerGeorg Brandl <georg@python.org>
Sun, 14 Apr 2013 09:40:00 +0000 (11:40 +0200)
Lib/sre_parse.py
Lib/test/test_re.py

index b195fd01dc9bda21b655d3a1e8f0611df98e0a06..2ebce8947e01fa0bfbd87da7761124a9fca46881 100644 (file)
@@ -225,13 +225,25 @@ class Tokenizer:
     def seek(self, index):
         self.index, self.next = index
 
+# The following three functions are not used in this module anymore, but we keep
+# them here (with DeprecationWarnings) for backwards compatibility.
+
 def isident(char):
+    import warnings
+    warnings.warn('sre_parse.isident() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
 
 def isdigit(char):
+    import warnings
+    warnings.warn('sre_parse.isdigit() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     return "0" <= char <= "9"
 
 def isname(name):
+    import warnings
+    warnings.warn('sre_parse.isname() will be removed in 3.5',
+                  DeprecationWarning, stacklevel=2)
     # check that group name is a valid string
     if not isident(name[0]):
         return False
@@ -587,7 +599,7 @@ def _parse(source, state):
                         group = 1
                         if not name:
                             raise error("missing group name")
-                        if not isname(name):
+                        if not name.isidentifier():
                             raise error("bad character in group name")
                     elif sourcematch("="):
                         # named backreference
@@ -601,7 +613,7 @@ def _parse(source, state):
                             name = name + char
                         if not name:
                             raise error("missing group name")
-                        if not isname(name):
+                        if not name.isidentifier():
                             raise error("bad character in group name")
                         gid = state.groupdict.get(name)
                         if gid is None:
@@ -655,7 +667,7 @@ def _parse(source, state):
                     group = 2
                     if not condname:
                         raise error("missing group name")
-                    if isname(condname):
+                    if condname.isidentifier():
                         condgroup = state.groupdict.get(condname)
                         if condgroup is None:
                             raise error("unknown group name")
@@ -792,7 +804,7 @@ def parse_template(source, pattern):
                     if index < 0:
                         raise error("negative group number")
                 except ValueError:
-                    if not isname(name):
+                    if not name.isidentifier():
                         raise error("bad character in group name")
                     try:
                         index = pattern.groupindex[name]
index ef19164ed818e90f7a6f5ea2bbed7efc29bd15ff..e90c770940ff4b143830c8e14d64dbefdeeb6aa9 100644 (file)
@@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
         self.assertRaises(re.error, re.compile, '(?(a))')
         self.assertRaises(re.error, re.compile, '(?(1a))')
         self.assertRaises(re.error, re.compile, '(?(a.))')
+        # New valid/invalid identifiers in Python 3
+        re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
+        re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)')
+        self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)')
 
     def test_symbolic_refs(self):
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
+        # New valid/invalid identifiers in Python 3
+        self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
+        self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<ยฉ>', 'xx')
 
     def test_re_subn(self):
         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))