]> granicus.if.org Git - python/commitdiff
Applied patch #725106, by Greg Chapman, fixing capturing groups
authorGustavo Niemeyer <gustavo@niemeyer.net>
Sun, 27 Apr 2003 12:34:14 +0000 (12:34 +0000)
committerGustavo Niemeyer <gustavo@niemeyer.net>
Sun, 27 Apr 2003 12:34:14 +0000 (12:34 +0000)
within repeats of alternatives. The only change to the original
patch was to convert the tests to the new test_re.py file.

This patch fixes cases like:

>>> re.match('((a)|b)*', 'abc').groups()
('b', '')

Which is wrong (it's impossible to match the empty string),
and incompatible with other regex systems, like the following
examples show:

% perl -e '"abc" =~ /^((a)|b)*/; print "$1 $2\n";'
b a

% echo "abc" | sed -r -e "s/^((a)|b)*/\1 \2|/"
b a|c

Lib/test/test_re.py
Modules/_sre.c

index 2430790301315fc00bce16490506a47eaca72b0d..7ba9a1b1f92fcd7e785b1fa17083e4c4e24c2c41 100644 (file)
@@ -276,6 +276,25 @@ class ReTests(unittest.TestCase):
             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
                              ('a:', 'a'))
 
+    def test_bug_725106(self):
+        # capturing groups in alternatives in repeats
+        self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
+                         ('b', 'a'))
+        self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
+                         ('c', 'b'))
+        self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+        self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
+                         ('b', None))
+
     def test_finditer(self):
         iter = re.finditer(r":+", "a:b::c:::d")
         self.assertEqual([item.group(0) for item in iter],
index 3f17d13c5dc4d4b33d5c8708c8707ed07027191c..b9e1827f831806d89709affdb837e06df83e1b31 100644 (file)
@@ -947,10 +947,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
                 if (pattern[1] == SRE_OP_IN &&
                     (ptr >= end || !SRE_CHARSET(pattern + 3, (SRE_CODE) *ptr)))
                     continue;
+                if (state->repeat) {
+                    i = mark_save(state, 0, lastmark);
+                    if (i < 0)
+                        return i;
+                }
                 state->ptr = ptr;
                 i = SRE_MATCH(state, pattern + 1, level + 1);
                 if (i)
                     return i;
+                if (state->repeat) {
+                    i = mark_restore(state, 0, lastmark);
+                    if (i < 0)
+                        return i;
+                }
                 LASTMARK_RESTORE();
             }
             return 0;