From 72b82ba16dea929b3fa9db5208b2353e8449c2d5 Mon Sep 17 00:00:00 2001
From: Fredrik Lundh <fredrik@pythonware.com>
Date: Mon, 3 Jul 2000 21:31:48 +0000
Subject: [PATCH] - fixed grouping error bug

- changed "group" operator to "groupref"
---
 Lib/sre_compile.py       | 19 +++++++++++-----
 Lib/sre_constants.py     |  8 +++----
 Lib/sre_parse.py         |  4 ++--
 Lib/test/output/test_sre |  3 ---
 Modules/_sre.c           | 49 +++++++++++++++++++++++++++-------------
 Modules/sre.h            |  3 +++
 6 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 701b267672..828b1702fe 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -148,18 +148,25 @@ def _compile(code, pattern, flags):
                     skip = len(code); emit(0)
                     emit(av[0])
                     emit(av[1])
+                    mark = MAXCODE
+                    if av[2][0][0] == SUBPATTERN:
+                        # repeated subpattern
+                        gid, foo = av[2][0][1]
+                        if gid:
+                            mark = (gid-1)*2
+                    emit(mark)
                     _compile(code, av[2], flags)
                     emit(OPCODES[SUCCESS])
                     code[skip] = len(code) - skip
         elif op is SUBPATTERN:
-            group = av[0]
-            if group:
+            gid = av[0]
+            if gid:
                 emit(OPCODES[MARK])
-                emit((group-1)*2)
+                emit((gid-1)*2)
             _compile(code, av[1], flags)
-            if group:
+            if gid:
                 emit(OPCODES[MARK])
-                emit((group-1)*2+1)
+                emit((gid-1)*2+1)
         elif op in (SUCCESS, FAILURE):
             emit(OPCODES[op])
         elif op in (ASSERT, ASSERT_NOT):
@@ -207,7 +214,7 @@ def _compile(code, pattern, flags):
                 emit(CHCODES[CH_UNICODE[av]])
             else:
                 emit(CHCODES[av])
-        elif op is GROUP:
+        elif op is GROUPREF:
             if flags & SRE_FLAG_IGNORECASE:
                 emit(OPCODES[OP_IGNORE[op]])
             else:
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 076637d86d..ef32c32bc3 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -29,8 +29,8 @@ BRANCH = "branch"
 CALL = "call"
 CATEGORY = "category"
 CHARSET = "charset"
-GROUP = "group"
-GROUP_IGNORE = "group_ignore"
+GROUPREF = "groupref"
+GROUPREF_IGNORE = "groupref_ignore"
 IN = "in"
 IN_IGNORE = "in_ignore"
 INDEX = "index"
@@ -90,7 +90,7 @@ OPCODES = [
     CALL,
     CATEGORY,
     CHARSET,
-    GROUP, GROUP_IGNORE,
+    GROUPREF, GROUPREF_IGNORE,
     INDEX,
     IN, IN_IGNORE,
     INFO,
@@ -136,7 +136,7 @@ CHCODES = makedict(CHCODES)
 
 # replacement operations for "ignore case" mode
 OP_IGNORE = {
-    GROUP: GROUP_IGNORE,
+    GROUPREF: GROUPREF_IGNORE,
     IN: IN_IGNORE,
     LITERAL: LITERAL_IGNORE,
     NOT_LITERAL: NOT_LITERAL_IGNORE
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 07ab78241f..053335a4d7 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -241,7 +241,7 @@ def _escape(source, escape, state):
                 if group:
                     if (not source.next or
                         not _group(escape + source.next, state.groups)):
-                        return GROUP, group
+                        return GROUPREF, group
                     escape = escape + source.get()
                 elif source.next in OCTDIGITS:
                     escape = escape + source.get()
@@ -450,7 +450,7 @@ def _parse(source, state):
                         gid = state.groupdict.get(name)
                         if gid is None:
                             raise error, "unknown group name"
-                        subpattern.append((GROUP, gid))
+                        subpattern.append((GROUPREF, gid))
                     elif source.match("#"):
                         index = ""
                         while 1:
diff --git a/Lib/test/output/test_sre b/Lib/test/output/test_sre
index 3ba209d329..d949f2551c 100644
--- a/Lib/test/output/test_sre
+++ b/Lib/test/output/test_sre
@@ -1,7 +1,4 @@
 test_sre
 === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
 === Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
-=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
-=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
-=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
 === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 764e155f97..cac480d780 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -406,6 +406,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 	int stackbase;
     int lastmark;
 	int i, count;
+    SRE_STACK* sp;
 
     /* FIXME: this is a hack! */
     void* mark_copy[SRE_MARK_SIZE];
@@ -571,8 +572,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 			/* set mark */
 			/* args: <mark> */
 			TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
-            if (state->lastmark < pattern[0])
-                state->lastmark = pattern[0];
+            if (state->lastmark < pattern[0]+1)
+                state->lastmark = pattern[0]+1;
             if (!mark) {
                 mark = mark_copy;
                 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
@@ -780,10 +781,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 #endif
 
 		case SRE_OP_MAX_REPEAT:
-			/* match repeated sequence (maximizing regexp).	 repeated
-			   group should end with a MAX_UNTIL code */
-
-            /* args: <skip> <min> <max> <item> */
+			/* match repeated sequence (maximizing regexp) */
+            /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
 
 			TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
 				   pattern[1], pattern[2]));
@@ -793,7 +792,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 
             /* match minimum number of items */
             while (count < (int) pattern[1]) {
-                i = SRE_MATCH(state, pattern + 3);
+                i = SRE_MATCH(state, pattern + 4);
                 if (i < 0)
                     return i;
                 if (!i)
@@ -817,8 +816,13 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
                points to the stack */
 
             while (pattern[2] == 65535 || count < (int) pattern[2]) {
+                void *mark0, *mark1;
+                if (pattern[3] != 65535) {
+                    mark0 = state->mark[pattern[3]];
+                    mark1 = state->mark[pattern[3]+1];
+                }
 				state->stackbase = stack;
-				i = SRE_MATCH(state, pattern + 3);
+				i = SRE_MATCH(state, pattern + 4);
 				state->stackbase = stackbase; /* rewind */
                 if (i < 0)
                     return i;
@@ -837,8 +841,14 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 						return i; /* out of memory */
 				}
                 TRACE(("%8d: stack[%d] = %d\n", PTR(ptr), stack, PTR(ptr)));
-				state->stack[stack].ptr = ptr;
-				state->stack[stack].pattern = pattern + pattern[0];
+                sp = state->stack + stack;
+				sp->ptr = ptr;
+				sp->pattern = pattern + pattern[0];
+                sp->mark = pattern[3];
+                if (pattern[3] != 65535) {
+                    sp->mark0 = mark0;
+                    sp->mark1 = mark1;
+                }
                 stack++;
 				/* move forward */
 				ptr = state->ptr;
@@ -855,13 +865,15 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 
 		case SRE_OP_MIN_REPEAT:
 			/* match repeated sequence (minimizing regexp) */
+            /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
+
 			TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
 				   pattern[1], pattern[2]));
 			count = 0;
 			state->ptr = ptr;
 			/* match minimum number of items */
 			while (count < (int) pattern[1]) {
-				i = SRE_MATCH(state, pattern + 3);
+				i = SRE_MATCH(state, pattern + 4);
 				if (i < 0)
                     return i;
                 if (!i)
@@ -877,7 +889,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 					goto success;
 				}
 				state->ptr = ptr; /* backtrack */
-				i = SRE_MATCH(state, pattern + 3);
+				i = SRE_MATCH(state, pattern + 4);
                 if (i < 0)
                     return i;
 				if (!i)
@@ -940,15 +952,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 	}
 
   failure:
+    TRACE(("%8d: leave (failure)\n", PTR(ptr)));
     if (stack-- > stackbase) {
-        ptr = state->stack[stack].ptr;
-        pattern = state->stack[stack].pattern;
+        sp = state->stack + stack;
+        ptr = sp->ptr;
+        pattern = sp->pattern;
+        if (sp->mark != 65535) {
+            state->mark[sp->mark] = sp->mark0;
+            state->mark[sp->mark+1] = sp->mark1;
+        }
         TRACE(("%8d: retry (%d)\n", PTR(ptr), stack));
         goto retry;
     }
-    TRACE(("%8d: leave (failure)\n", PTR(ptr)));
-    state->stackbase = stackbase;
     state->lastmark = lastmark;
+    state->stackbase = stackbase;
     if (mark)
         memcpy(state->mark, mark, state->lastmark*sizeof(void*));
     return 0;
diff --git a/Modules/sre.h b/Modules/sre.h
index d4e93dab29..1c4bf68728 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -46,6 +46,9 @@ typedef struct {
     /* stack elements */
     SRE_CODE* pattern;
     void* ptr;
+    int mark;
+    void* mark0;
+    void* mark1;
 } SRE_STACK;
 
 /* FIXME: <fl> shouldn't be a constant, really... */
-- 
2.40.0