]> granicus.if.org Git - python/commitdiff
- make sure group names are valid identifiers
authorFredrik Lundh <fredrik@pythonware.com>
Thu, 29 Jun 2000 12:38:45 +0000 (12:38 +0000)
committerFredrik Lundh <fredrik@pythonware.com>
Thu, 29 Jun 2000 12:38:45 +0000 (12:38 +0000)
  (closes the "SRE: symbolic reference" bug)

Lib/sre_parse.py

index 9f5386e3462d709c31fb6baa9031806173148f0a..8ab36c8698453906e86ae840f812be884c881933 100644 (file)
@@ -168,6 +168,24 @@ class Tokenizer:
        self.next = self.__next()
        return this
 
+def isident(char):
+    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
+
+def isdigit(char):
+    return "0" <= char <= "9"
+
+def isname(name):
+    # check that group name is a valid string
+    # FIXME: <fl> this code is really lame.  should use a regular
+    # expression instead, but I seem to have certain bootstrapping
+    # problems here ;-)
+    if not isident(name[0]):
+       return 0
+    for char in name:
+       if not isident(char) and not isdigit(char):
+           return 0
+    return 1
+
 def _group(escape, state):
     # check if the escape string represents a valid group
     try:
@@ -418,9 +436,10 @@ def _parse(source, state, flags=0):
                                raise error, "unterminated name"
                            if char == ">":
                                break
-                           # FIXME: check for valid character
                            name = name + char
                        group = 1
+                       if not isname(name):
+                           raise error, "illegal character in group name"
                    elif source.match("="):
                        # named backreference
                        raise error, "not yet implemented"
@@ -522,20 +541,21 @@ def parse_template(source, pattern):
                    while 1:
                        char = s.get()
                        if char is None:
-                           raise error, "unterminated index"
+                           raise error, "unterminated group name"
                        if char == ">":
                            break
-                       # FIXME: check for valid character
                        name = name + char
                if not name:
-                   raise error, "bad index"
+                   raise error, "bad group name"
                try:
                    index = int(name)
                except ValueError:
+                   if not isname(name):
+                       raise error, "illegal character in group name"
                    try:
                        index = pattern.groupindex[name]
                    except KeyError:
-                       raise IndexError, "unknown index"
+                       raise IndexError, "unknown group name"
                a((MARK, index))
            elif len(this) > 1 and this[1] in DIGITS:
                while s.next in DIGITS: