Jeffrey's newest

author Guido van Rossum <guido@python.org>

Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)

committer Guido van Rossum <guido@python.org>

Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)
author Guido van Rossum <guido@python.org>
Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)
committer Guido van Rossum <guido@python.org>
Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)
diff --git a/Lib/re.py b/Lib/re.py

index 2d24da58558312c80bd9bcadeb62b74ca6e34403..b701bb67fbff3423fc8fa7358180ce45f8e0e46a 100644 (file)
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -153,9 +153,7 @@ class MatchObject:
                     g = self.re.groupindex[g]
                 except (KeyError, TypeError):
                     raise IndexError, ('group "' + g + '" is undefined')
-           if g >= len(self.regs):
-               result.append(None)
-           elif (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
+           if (self.regs[g][0] == -1) or (self.regs[g][1] == -1):
                 result.append(None)
             else:
                 result.append(self.string[self.regs[g][0]:self.regs[g][1]])
@@ -525,6 +523,186 @@ def build_fastmap(code, pos=0):
  #
  #
  
+[NORMAL, CHARCLASS, REPLACEMENT] = range(3)
+[CHAR, MEMORY_REFERENCE, SYNTAX, SET, WORD_BOUNDARY, NOT_WORD_BOUNDARY,
+ BEGINNING_OF_BUFFER, END_OF_BUFFER] = range(8)
+
+def expand_escape(pattern, index, context=NORMAL):
+    if index >= len(pattern):
+       raise error, 'escape ends too soon'
+
+    elif pattern[index] == 't':
+       return CHAR, chr(9), index + 1
+    
+    elif pattern[index] == 'n':
+       return CHAR, chr(10), index + 1
+    
+    elif pattern[index] == 'r':
+       return CHAR, chr(13), index + 1
+    
+    elif pattern[index] == 'f':
+       return CHAR, chr(12), index + 1
+    
+    elif pattern[index] == 'a':
+       return CHAR, chr(7), index + 1
+    
+    elif pattern[index] == 'e':
+       return CHAR, chr(27), index + 1
+    
+    elif pattern[index] == 'c':
+       if index + 1 >= len(pattern):
+           raise error, '\\c must be followed by another character'
+       elif pattern[index + 1] in 'abcdefghijklmnopqrstuvwxyz':
+           return CHAR, chr(ord(pattern[index + 1]) - ord('a') + 1), index + 2
+       else:
+           return CHAR, chr(ord(pattern[index + 1]) ^ 64), index + 2
+       
+    elif pattern[index] == 'x':
+       # CAUTION: this is the Python rule, not the Perl rule!
+       end = index
+       while (end < len(pattern)) and (pattern[end] in string.hexdigits):
+           end = end + 1
+       if end == index:
+           raise error, "\\x must be followed by hex digit(s)"
+       # let Python evaluate it, so we don't incorrectly 2nd-guess
+       # what it's doing (and Python in turn passes it on to sscanf,
+       # so that *it* doesn't incorrectly 2nd-guess what C does!)
+       char = eval ('"' + pattern[index-2:end] + '"')
+       assert len(char) == 1
+       return CHAR, char, end
+
+    elif pattern[index] == 'b':
+       if context != NORMAL:
+           return CHAR, chr(8), index + 1
+       else:
+           return WORD_BOUNDARY, '', index + 1
+           
+    elif pattern[index] == 'B':
+       if context != NORMAL:
+           return CHAR, 'B', index + 1
+       else:
+           return NOT_WORD_BOUNDARY, '', index + 1
+           
+    elif pattern[index] == 'A':
+       if context != NORMAL:
+           return CHAR, 'A', index + 1
+       else:
+           return BEGINNING_OF_BUFFER, '', index + 1
+           
+    elif pattern[index] == 'Z':
+       if context != NORMAL:
+           return 'Z', index + 1
+       else:
+           return END_OF_BUFFER, '', index + 1
+           
+    elif pattern[index] in 'GluLUQE':
+       raise error, ('\\' + ch + ' is not allowed')
+    
+    elif pattern[index] == 'w':
+       if context == NORMAL:
+           return SYNTAX, 'word', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'word' in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 'w', index + 1
+       
+    elif pattern[index] == 'W':
+       if context == NORMAL:
+           return NOT_SYNTAX, 'word', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'word' not in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 'W', index + 1
+       
+    elif pattern[index] == 's':
+       if context == NORMAL:
+           return SYNTAX, 'whitespace', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'whitespace' in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 's', index + 1
+       
+    elif pattern[index] == 'S':
+       if context == NORMAL:
+           return NOT_SYNTAX, 'whitespace', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'whitespace' not in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 'S', index + 1
+       
+    elif pattern[index] == 'd':
+       if context == NORMAL:
+           return SYNTAX, 'digit', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'digit' in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 'd', index + 1
+       
+    elif pattern[index] == 'D':
+       if context == NORMAL:
+           return NOT_SYNTAX, 'digit', index + 1
+       elif context == CHARCLASS:
+           set = []
+           for char in syntax_table.keys():
+               if 'digit' not in syntax_table[char]:
+                   set.append(char)
+           return SET, set, index + 1
+       else:
+           return CHAR, 'D', index + 1
+
+    elif pattern[index] in '0123456789':
+       end = index
+       while (end < len(pattern)) and (pattern[end] in string.digits):
+           end = end + 1
+       value = pattern[index:end]
+
+       if (len(value) == 3) or ((len(value) == 2) and (value[0] == '0')):
+           # octal character value
+           value = string.atoi(value, 8)
+           if value > 255:
+               raise error, 'octal char out of range'
+           return CHAR, chr(value), end
+
+       elif value == '0':
+           return CHAR, chr(0), end
+
+       elif len(value) > 3:
+           raise error, ('\\' + value + ' has too many digits')
+
+       else:
+           # \1-\99 - reference a register
+           if context == CHARCLASS:
+               raise error, ('cannot reference a register from '
+                             'inside a character class')
+           value = string.atoi(value)
+           if value == 0:
+               raise error, ('register 0 cannot be used '
+                             'during match')
+           return MEMORY_REFERENCE, value, end
+
+    else:
+       return CHAR, pattern[index], index + 1
+
  def compile(pattern, flags=0):
      stack = []
      index = 0
@@ -536,118 +714,50 @@ def compile(pattern, flags=0):
         char = pattern[index]
         index = index + 1
         if char == '\\':
-           if index < len(pattern):
-               next = pattern[index]
-               index = index + 1
-               if next == 't':
-                   stack.append([Exact(chr(9))])
-
-               elif next == 'n':
-                   stack.append([Exact(chr(10))])
-
-               elif next == 'r':
-                   stack.append([Exact(chr(13))])
-
-               elif next == 'f':
-                   stack.append([Exact(chr(12))])
-
-               elif next == 'a':
-                   stack.append([Exact(chr(7))])
-
-               elif next == 'e':
-                   stack.append([Exact(chr(27))])
+           escape_type, value, index = expand_escape(pattern, index)
  
-               elif next in '0123456789':
-                   value = next
-                   while (index < len(pattern)) and \
-                         (pattern[index] in string.digits):
-                       value = value + pattern[index]
-                       index = index + 1
-                   if (len(value) == 3) or \
-                      ((len(value) == 2) and (value[0] == '0')):
-                       value = string.atoi(value, 8)
-                       if value > 255:
-                           raise error, 'octal char out of range'
-                       stack.append([Exact(chr(value))])
-                   elif value == '0':
-                       stack.append([Exact(chr(0))])
-                   elif len(value) > 3:
-                       raise error, 'too many digits'
-                   else:
-                       value = string.atoi(value)
-                       if value >= register:
-                           raise error, ('cannot reference a register '
-                                         'not yet used')
-                       elif value == 0:
-                           raise error, ('register 0 cannot be used '
-                                         'during match')
-                       stack.append([MatchMemory(value)])
-
-               elif next == 'x':
-                   value = ''
-                   while (index < len(pattern)) and \
-                         (pattern[index] in string.hexdigits):
-                       value = value + pattern[index]
-                       index = index + 1
-                   value = string.atoi(value, 16)
-                   if value > 255:
-                       raise error, 'hex char out of range'
-                   stack.append([Exact(chr(value))])
-
-               elif next == 'c':
-                   if index >= len(pattern):
-                       raise error, '\\c at end of re'
-                   elif pattern[index] in 'abcdefghijklmnopqrstuvwxyz':
-                       stack.append(Exact(chr(ord(pattern[index]) -
-                                              ord('a') + 1)))
-                   else:
-                       stack.append(Exact(chr(ord(pattern[index]) ^ 64)))
-                   index = index + 1
-                   
-               elif next == 'A':
-                   stack.append([BegBuf()])
-
-               elif next == 'Z':
-                   stack.append([EndBuf()])
-
-               elif next == 'b':
-                   stack.append([WordBound()])
-
-               elif next == 'B':
-                   stack.append([NotWordBound()])
-
-               elif next == 'w':
-                   stack.append([SyntaxSpec('word')])
-
-               elif next == 'W':
-                   stack.append([NotSyntaxSpec('word')])
-
-               elif next == 's':
-                   stack.append([SyntaxSpec('whitespace')])
-
-               elif next == 'S':
-                   stack.append([NotSyntaxSpec('whitespace')])
-
-               elif next == 'd':
-                   stack.append([SyntaxSpec('digit')])
-
-               elif next == 'D':
-                   stack.append([NotSyntaxSpec('digit')])
-
-               elif next in 'GluLUQE':
-                   # some perl-isms that we don't support
-                   raise error, '\\' + next + ' not supported'
+           if escape_type == CHAR:
+               stack.append([Exact(value)])
                 
-               else:
-                   stack.append([Exact(pattern[index])])
-
+           elif escape_type == MEMORY_REFERENCE:
+               if value >= register:
+                   raise error, ('cannot reference a register '
+                                 'not yet used')
+               stack.append([MatchMemory(value)])
+               
+           elif escape_type == BEGINNING_OF_BUFFER:
+               stack.append([BegBuf()])
+               
+           elif escape_type == END_OF_BUFFER:
+               stack.append([EndBuf()])
+               
+           elif escape_type == WORD_BOUNDARY:
+               stack.append([WordBound()])
+               
+           elif escape_type == NOT_WORD_BOUNDARY:
+               stack.append([NotWordBound()])
+               
+           elif escape_type == SYNTAX:
+               stack.append([SyntaxSpec(value)])
+               
+           elif escape_type == NOT_SYNTAX:
+               stack.append([NotSyntaxSpec(value)])
+               
+           elif escape_type == SET:
+               raise error, 'cannot use set escape type here'
+           
             else:
-               raise error, 'backslash at the end of a string'
+               raise error, 'unknown escape type'
  
         elif char == '|':
             if len(stack) == 0:
-               raise error, 'nothing to alternate'
+               raise error, 'alternate with nothing on the left'
+           if stack[-1][0].name == '(':
+               raise error, 'alternate with nothing on the left in the group'
+           if stack[-1][0].name == '|':
+               raise error, 'alternates with nothing inbetween them'
             expr = []
+           
             while (len(stack) != 0) and \
                   (stack[-1][0].name != '(') and \
                   (stack[-1][0].name != '|'):
@@ -775,12 +885,13 @@ def compile(pattern, flags=0):
  
             if len(stack) == 0:
                 raise error, 'too many close parens'
+           
             if len(expr) == 0:
                 raise error, 'nothing inside parens'
  
             # check to see if alternation used correctly
             if (expr[-1].name == '|'):
-               raise error, 'alternation with nothing on the right'
+               raise error, 'alternate with nothing on the right'
  
             # remove markers left by alternation
             expr = filter(lambda x: x.name != '|', expr)
@@ -789,7 +900,7 @@ def compile(pattern, flags=0):
             need_label = 0
             for i in range(len(expr)):
                 if (expr[i].name == 'jump') and (expr[i].label == -1):
-                   expr[i] = JumpOpcode(label)
+                   expr[i] = Jump(label)
                     need_label = 1
             if need_label:
                 expr.append(Label(label))
@@ -1033,7 +1144,7 @@ def compile(pattern, flags=0):
                 stack.append([Exact(char)])
  
         elif char in string.whitespace:
-           if flags & VERBOSE:
+           if not (flags & VERBOSE):
                 stack.append([Exact(char)])
  
         elif char == '[':
@@ -1042,28 +1153,44 @@ def compile(pattern, flags=0):
             negate = 0
             last = ''
             set = []
+
             if pattern[index] == '^':
                 negate = 1
                 index = index + 1
             if index >= len(pattern):
                 raise error, 'incomplete set'
-           if pattern[index] in ']-':
-               set.append(pattern[index])
-               last = pattern[index]
-               index = index + 1
+
             while (index < len(pattern)) and (pattern[index] != ']'):
                 next = pattern[index]
                 index = index + 1
                 if next == '-':
+                   if last == '':
+                       raise error, 'improper use of range in character set'
+
+                   start = last
+                   
                     if (index >= len(pattern)) or (pattern[index] == ']'):
                         raise error, 'incomplete range in set'
-                   if last > pattern[index]:
+                   
+                   if pattern[index] == '\\':
+                       escape_type, value, index = expand_escape(pattern,
+                                                                 index + 1,
+                                                                 CHARCLASS)
+
+                       if escape_type == CHAR:
+                           end = value
+                       else:
+                           raise error, ('illegal escape in character '
+                                         'class range')
+                   else:
+                       end = pattern[index]
+                       
+                   if start > end:
                         raise error, 'range arguments out of order in set'
-                   for next in map(chr, \
-                                   range(ord(last), \
-                                         ord(pattern[index]) + 1)):
-                       if next not in set:
-                           set.append(next)
+                   for char in map(chr, range(ord(start), ord(end) + 1)):
+                       if char not in set:
+                           set.append(char)
+                           
                     last = ''
                     index = index + 1
  
@@ -1071,42 +1198,30 @@ def compile(pattern, flags=0):
                     # expand syntax meta-characters and add to set
                     if index >= len(pattern):
                         raise error, 'incomplete set'
-                   elif (pattern[index] == ']'):
-                       raise error, 'backslash at the end of a set'
-                   elif pattern[index] == 'w':
-                       for next in syntax_table.keys():
-                           if 'word' in syntax_table[next]:
-                               set.append(next)
-                   elif pattern[index] == 'W':
-                       for next in syntax_table.keys():
-                           if 'word' not in syntax_table[next]:
-                               set.append(next)
-                   elif pattern[index] == 'd':
-                       for next in syntax_table.keys():
-                           if 'digit' in syntax_table[next]:
-                               set.append(next)
-                   elif pattern[index] == 'D':
-                       for next in syntax_table.keys():
-                           if 'digit' not in syntax_table[next]:
-                               set.append(next)
-                   elif pattern[index] == 's':
-                       for next in syntax_table.keys():
-                           if 'whitespace' in syntax_table[next]:
-                               set.append(next)
-                   elif pattern[index] == 'S':
-                       for next in syntax_table.keys():
-                           if 'whitespace' not in syntax_table[next]:
-                               set.append(next)
+
+                   escape_type, value, index = expand_escape(pattern,
+                                                             index,
+                                                             CHARCLASS)
+
+                   if escape_type == CHAR:
+                       set.append(value)
+                       last = value
+
+                   elif escape_type == SET:
+                       for char in value:
+                           if char not in set:
+                               set.append(char)
+                       last = ''
+
                     else:
-                       raise error, 'unknown meta in set'
-                   last = ''
-                   index = index + 1
+                       raise error, 'illegal escape type in character class'
  
                 else:
                     if next not in set:
                         set.append(next)
                     last = next
-           if pattern[index] != ']':
+                   
+           if (index >= len(pattern)) or ( pattern[index] != ']'):
                 raise error, 'incomplete set'
  
             index = index + 1
@@ -1116,8 +1231,12 @@ def compile(pattern, flags=0):
                 for char in map(chr, range(256)):
                     if char not in set:
                         notset.append(char)
+               if len(notset) == 0:
+                   raise error, 'empty negated set'
                 stack.append([Set(notset)])
             else:
+               if len(set) == 0:
+                   raise error, 'empty set'
                 stack.append([Set(set)])
  
         else:
@@ -1132,7 +1251,7 @@ def compile(pattern, flags=0):
      if len(code) == 0:
         raise error, 'no code generated'
      if (code[-1].name == '|'):
-       raise error, 'alternation with nothing on the right'
+       raise error, 'alternate with nothing on the right'
      code = filter(lambda x: x.name != '|', code)
      need_label = 0
      for i in range(len(code)):
diff --git a/Lib/test/output/test_re b/Lib/test/output/test_re

index 56a225cadeb993a5bae4bc8eee291d0c263ebba9..7ba8cfa4af1ee8011662c4daf7c61a8c3d5c6df1 100644 (file)
--- a/Lib/test/output/test_re
+++ b/Lib/test/output/test_re
@@ -34,21 +34,25 @@ test_re
  ('a[b-d]e', 'ace', 0, 'found', 'ace')
  ('a[b-d]', 'aac', 0, 'found', 'ac')
  ('a[-b]', 'a-', 0, 'found', 'a-')
+=== Syntax error: ('a[-b]', 'a-', 0, 'found', 'a-')
  ('a[b-]', 'a-', 2)
  ('a[]b', '-', 2)
-*** Unexpected error ***
  ('a[', '-', 2)
  ('a\\', '-', 2)
  ('abc)', '-', 2)
  ('(abc', '-', 2)
  ('a]', 'a]', 0, 'found', 'a]')
  ('a[]]b', 'a]b', 0, 'found', 'a]b')
+=== Syntax error: ('a[]]b', 'a]b', 0, 'found', 'a]b')
  ('a[^bc]d', 'aed', 0, 'found', 'aed')
  ('a[^bc]d', 'abd', 1)
  ('a[^-b]c', 'adc', 0, 'found', 'adc')
+=== Syntax error: ('a[^-b]c', 'adc', 0, 'found', 'adc')
  ('a[^-b]c', 'a-c', 1)
+=== Syntax error: ('a[^-b]c', 'a-c', 1)
  ('a[^]b]c', 'a]c', 1)
  ('a[^]b]c', 'adc', 0, 'found', 'adc')
+=== Failed incorrectly ('a[^]b]c', 'adc', 0, 'found', 'adc')
  ('\\ba\\b', 'a-', 0, '"-"', '-')
  ('\\ba\\b', '-a', 0, '"-"', '-')
  ('\\ba\\b', '-a-', 0, '"-"', '-')
@@ -64,125 +68,76 @@ test_re
  === Syntax error: ('a(b', 'a(b', 0, 'found+"-"+g1', 'a(b-None')
  ('a\\(*b', 'ab', 0, 'found', 'ab')
  ('a\\(*b', 'a((b', 0, 'found', 'a((b')
-=== Failed incorrectly ('a\\(*b', 'a((b', 0, 'found', 'a((b')
  ('a\\\\b', 'a\\b', 0, 'found', 'a\\b')
-=== Failed incorrectly ('a\\\\b', 'a\\b', 0, 'found', 'a\\b')
  ('((a))', 'abc', 0, 'found+"-"+g1+"-"+g2', 'a-a-a')
-=== grouping error ('((a))', 'abc', 0, 'found+"-"+g1+"-"+g2', 'a-a-a') 'a-None-None' should be 'a-a-a'
  ('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c')
-=== grouping error ('(a)b(c)', 'abc', 0, 'found+"-"+g1+"-"+g2', 'abc-a-c') 'abc-None-None' should be 'abc-a-c'
  ('a+b+c', 'aabbabc', 0, 'found', 'abc')
  ('(a+|b)*', 'ab', 0, 'found+"-"+g1', 'ab-b')
-*** Unexpected error ***
  ('(a+|b)+', 'ab', 0, 'found+"-"+g1', 'ab-b')
-*** Unexpected error ***
  ('(a+|b)?', 'ab', 0, 'found+"-"+g1', 'a-a')
-*** Unexpected error ***
  (')(', '-', 2)
  ('[^ab]*', 'cde', 0, 'found', 'cde')
  ('abc', '', 1)
  ('a*', '', 0, 'found', '')
  ('a|b|c|d|e', 'e', 0, 'found', 'e')
  ('(a|b|c|d|e)f', 'ef', 0, 'found+"-"+g1', 'ef-e')
-*** Unexpected error ***
  ('abcd*efg', 'abcdefg', 0, 'found', 'abcdefg')
  ('ab*', 'xabyabbbz', 0, 'found', 'ab')
  ('ab*', 'xayabbbz', 0, 'found', 'a')
  ('(ab|cd)e', 'abcde', 0, 'found+"-"+g1', 'cde-cd')
-*** Unexpected error ***
  ('[abhgefdc]ij', 'hij', 0, 'found', 'hij')
  ('^(ab|cd)e', 'abcde', 1, 'xg1y', 'xy')
-*** Unexpected error ***
  ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
  === Syntax error: ('(abc|)ef', 'abcdef', 0, 'found+"-"+g1', 'ef-')
  ('(a|b)c*d', 'abcd', 0, 'found+"-"+g1', 'bcd-b')
-*** Unexpected error ***
  ('(ab|ab*)bc', 'abc', 0, 'found+"-"+g1', 'abc-a')
-*** Unexpected error ***
  ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc')
-=== grouping error ('a([bc]*)c*', 'abc', 0, 'found+"-"+g1', 'abc-bc') 'abc-None' should be 'abc-bc'
  ('a([bc]*)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d')
-=== grouping error ('a([bc]*)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') 'abcd-None-None' should be 'abcd-bc-d'
  ('a([bc]+)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d')
-=== grouping error ('a([bc]+)(c*d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-bc-d') 'abcd-None-None' should be 'abcd-bc-d'
  ('a([bc]*)(c+d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-b-cd')
-=== grouping error ('a([bc]*)(c+d)', 'abcd', 0, 'found+"-"+g1+"-"+g2', 'abcd-b-cd') 'abcd-None-None' should be 'abcd-b-cd'
  ('a[bcd]*dcdcde', 'adcdcde', 0, 'found', 'adcdcde')
  ('a[bcd]+dcdcde', 'adcdcde', 1)
  ('(ab|a)b*c', 'abc', 0, 'found+"-"+g1', 'abc-ab')
-*** Unexpected error ***
  ('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d')
-=== grouping error ('((a)(b)c)(d)', 'abcd', 0, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d') 'None-None-None-None' should be 'abc-a-b-d'
  ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', 0, 'found', 'alpha')
  ('^a(bc+|b[eh])g|.h$', 'abh', 0, 'found+"-"+g1', 'bh-None')
-*** Unexpected error ***
  ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
-*** Unexpected error ***
  ('(bc+d$|ef*g.|h?i(j|k))', 'ij', 0, 'found+"-"+g1+"-"+g2', 'ij-ij-j')
-*** Unexpected error ***
  ('(bc+d$|ef*g.|h?i(j|k))', 'effg', 1)
-*** Unexpected error ***
  ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', 1)
-*** Unexpected error ***
  ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', 0, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None')
-*** Unexpected error ***
  ('(((((((((a)))))))))', 'a', 0, 'found', 'a')
  ('multiple words of text', 'uh-uh', 1)
  ('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words')
-=== Failed incorrectly ('multiple words', 'multiple words, yeah', 0, 'found', 'multiple words')
  ('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de')
-=== grouping error ('(.*)c(.*)', 'abcde', 0, 'found+"-"+g1+"-"+g2', 'abcde-ab-de') 'abcde-None-None' should be 'abcde-ab-de'
-('((.*), (.*))', '(a, b)', 0, 'g2+"-"+g1', 'b-a')
-=== grouping error ('((.*), (.*))', '(a, b)', 0, 'g2+"-"+g1', 'b-a') 'None-None' should be 'b-a'
+('\\((.*), (.*)\\)', '(a, b)', 0, 'g2+"-"+g1', 'b-a')
  ('[k]', 'ab', 1)
  ('a[-]?c', 'ac', 0, 'found', 'ac')
+=== Syntax error: ('a[-]?c', 'ac', 0, 'found', 'ac')
  ('(abc)\\1', 'abcabc', 0, 'g1', 'abc')
-=== grouping error ('(abc)\\1', 'abcabc', 0, 'g1', 'abc') 'None' should be 'abc'
  ('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc')
-=== grouping error ('([a-c]*)\\1', 'abcabc', 0, 'g1', 'abc') 'None' should be 'abc'
  ('^(.+)?B', 'AB', 0, 'g1', 'A')
-=== grouping error ('^(.+)?B', 'AB', 0, 'g1', 'A') 'None' should be 'A'
  ('(a+).\\1$', 'aaaaa', 0, 'found+"-"+g1', 'aaaaa-aa')
-=== grouping error ('(a+).\\1$', 'aaaaa', 0, 'found+"-"+g1', 'aaaaa-aa') 'aaaaa-None' should be 'aaaaa-aa'
  ('^(a+).\\1$', 'aaaa', 1)
  ('(abc)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc')
-=== grouping error ('(abc)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc'
  ('([a-c]+)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc')
-=== grouping error ('([a-c]+)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc'
  ('(a)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
-=== grouping error ('(a)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a'
  ('(a+)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
-=== grouping error ('(a+)\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a'
  ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
-=== grouping error ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a') 'aa-None' should be 'aa-a'
  ('(a).+\\1', 'aba', 0, 'found+"-"+g1', 'aba-a')
-=== grouping error ('(a).+\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') 'aba-None' should be 'aba-a'
  ('(a)ba*\\1', 'aba', 0, 'found+"-"+g1', 'aba-a')
-=== grouping error ('(a)ba*\\1', 'aba', 0, 'found+"-"+g1', 'aba-a') 'aba-None' should be 'aba-a'
  ('(aa|a)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a')
-*** Unexpected error ***
  ('(a|aa)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a')
-*** Unexpected error ***
  ('(a+)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a')
-=== grouping error ('(a+)a\\1$', 'aaa', 0, 'found+"-"+g1', 'aaa-a') 'aaa-None' should be 'aaa-a'
  ('([abc]*)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc')
-=== grouping error ('([abc]*)\\1', 'abcabc', 0, 'found+"-"+g1', 'abcabc-abc') 'abcabc-None' should be 'abcabc-abc'
  ('(a)(b)c|ab', 'ab', 0, 'found+"-"+g1+"-"+g2', 'ab-None-None')
  ('(a)+x', 'aaax', 0, 'found+"-"+g1', 'aaax-a')
-=== grouping error ('(a)+x', 'aaax', 0, 'found+"-"+g1', 'aaax-a') 'aaax-None' should be 'aaax-a'
  ('([ac])+x', 'aacx', 0, 'found+"-"+g1', 'aacx-c')
-=== grouping error ('([ac])+x', 'aacx', 0, 'found+"-"+g1', 'aacx-c') 'aacx-None' should be 'aacx-c'
  ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/')
-=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-None' should be 'd:msgs/tdir/sub1/-tdir/'
  ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', 0, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah')
-=== Failed incorrectly ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', 0, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah')
  ('([^N]*N)+', 'abNNxyzN', 0, 'found+"-"+g1', 'abNNxyzN-xyzN')
-=== grouping error ('([^N]*N)+', 'abNNxyzN', 0, 'found+"-"+g1', 'abNNxyzN-xyzN') 'abNNxyzN-None' should be 'abNNxyzN-xyzN'
  ('([^N]*N)+', 'abNNxyz', 0, 'found+"-"+g1', 'abNN-N')
-=== grouping error ('([^N]*N)+', 'abNNxyz', 0, 'found+"-"+g1', 'abNN-N') 'abNN-None' should be 'abNN-N'
  ('([abc]*)x', 'abcx', 0, 'found+"-"+g1', 'abcx-abc')
-=== grouping error ('([abc]*)x', 'abcx', 0, 'found+"-"+g1', 'abcx-abc') 'abcx-None' should be 'abcx-abc'
  ('([abc]*)x', 'abc', 1)
  ('([xyz]*)x', 'abcx', 0, 'found+"-"+g1', 'x-')
-=== grouping error ('([xyz]*)x', 'abcx', 0, 'found+"-"+g1', 'x-') 'x-None' should be 'x-'
  ('(a)+b|aac', 'aac', 0, 'found+"-"+g1', 'aac-None')
diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py

index fc1fd571b57320a035a6047a402f1692d07241f3..a43b4acc8d3ffbaee3886ee8c96c8fe025cd47f6 100755 (executable)
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -218,7 +218,7 @@ tests = [
   'found', 'multiple words'),
  ('(.*)c(.*)', 'abcde', SUCCEED,
   'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
-('((.*), (.*))', '(a, b)', SUCCEED,
+('\\((.*), (.*)\\)', '(a, b)', SUCCEED,
   'g2+"-"+g1', 'b-a'),
  ('[k]', 'ab', FAIL),
  ('a[-]?c', 'ac', SUCCEED,
diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c

index 2ac467f824b9c5a10faf2c05f26bd3b3e908cd8f..9b928f5beffd09ade80e487fbfd40cc3cf7739f6 100644 (file)
--- a/Modules/reopmodule.c
+++ b/Modules/reopmodule.c
@@ -87,13 +87,13 @@ reop_match(self, args)
         char *string;
         int fastmaplen, stringlen;
         int can_be_null, anchor, i;
-       int num_regs, flags, pos, result;
+       int flags, pos, result;
         struct re_pattern_buffer bufp;
         struct re_registers re_regs;
         
         if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                          &(bufp.buffer), &(bufp.allocated), 
-                        &num_regs, &flags, &can_be_null,
+                        &(bufp.num_registers), &flags, &can_be_null,
                          &(bufp.fastmap), &fastmaplen,
                          &anchor,
                          &string, &stringlen, 
@@ -106,10 +106,9 @@ reop_match(self, args)
         bufp.fastmap_accurate=1;
         bufp.can_be_null=can_be_null;
         bufp.uses_registers=1;
-       bufp.num_registers=num_regs;
         bufp.anchor=anchor;
         
-       for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
         
         result = re_match(&bufp, 
                           string, stringlen, pos, 
@@ -123,7 +122,7 @@ reop_match(self, args)
                 Py_INCREF(Py_None);
                 return Py_None;
         }
-       return makeresult(&re_regs, num_regs);
+       return makeresult(&re_regs, bufp.num_registers);
  }
  
  static PyObject *
@@ -134,13 +133,13 @@ reop_search(self, args)
         char *string;
         int fastmaplen, stringlen;
         int can_be_null, anchor, i;
-       int num_regs, flags, pos, result;
+       int flags, pos, result;
         struct re_pattern_buffer bufp;
         struct re_registers re_regs;
         
         if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                          &(bufp.buffer), &(bufp.allocated), 
-                        &num_regs, &flags, &can_be_null,
+                        &(bufp.num_registers), &flags, &can_be_null,
                          &(bufp.fastmap), &fastmaplen,
                          &anchor,
                          &string, &stringlen, 
@@ -153,10 +152,9 @@ reop_search(self, args)
         bufp.fastmap_accurate=1;
         bufp.can_be_null=can_be_null;
         bufp.uses_registers=1;
-       bufp.num_registers=1;
         bufp.anchor=anchor;
  
-       for(i=0; i<num_regs; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
         
         result = re_search(&bufp, 
                            string, stringlen, pos, stringlen-pos,
@@ -170,7 +168,7 @@ reop_search(self, args)
                 Py_INCREF(Py_None);
                 return Py_None;
         }
-       return makeresult(&re_regs, num_regs);
+       return makeresult(&re_regs, bufp.num_registers);
  }
  
  #if 0
author	Guido van Rossum <guido@python.org>
	Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)
committer	Guido van Rossum <guido@python.org>
	Tue, 15 Jul 1997 14:38:13 +0000 (14:38 +0000)
Lib/re.py		patch \| blob \| history
Lib/test/output/test_re		patch \| blob \| history
Lib/test/re_tests.py		patch \| blob \| history
Modules/reopmodule.c		patch \| blob \| history