]> granicus.if.org Git - python/commitdiff
SF patch #720991 by Gary Herron:
authorGuido van Rossum <guido@python.org>
Mon, 14 Apr 2003 17:59:34 +0000 (17:59 +0000)
committerGuido van Rossum <guido@python.org>
Mon, 14 Apr 2003 17:59:34 +0000 (17:59 +0000)
A small fix for bug #545855 and Greg Chapman's
addition of op code SRE_OP_MIN_REPEAT_ONE for
eliminating recursion on simple uses of pattern '*?' on a
long string.

Lib/sre_compile.py
Lib/sre_constants.py
Lib/sre_parse.py
Lib/test/test_sre.py
Misc/NEWS
Modules/_sre.c
Modules/sre_constants.h

index bb17649523ffd1146112143821783131724e8749..3e54819bc9d5f9cb671ef945281a840bdf9df524 100644 (file)
@@ -55,8 +55,11 @@ def _compile(code, pattern, flags):
                 _compile(code, av[2], flags)
                 emit(OPCODES[SUCCESS])
                 code[skip] = len(code) - skip
-            elif _simple(av) and op == MAX_REPEAT:
-                emit(OPCODES[REPEAT_ONE])
+            elif _simple(av) and op != REPEAT:
+                if op == MAX_REPEAT:
+                    emit(OPCODES[REPEAT_ONE])
+                else:
+                    emit(OPCODES[MIN_REPEAT_ONE])
                 skip = len(code); emit(0)
                 emit(av[0])
                 emit(av[1])
index b0b61d91bfccc0ee2cbc5bdcfddb78fd6ef17fcf..2cd85a39f9fb3c4330ea4cfc7646ec47ec95ee02 100644 (file)
@@ -60,6 +60,7 @@ RANGE = "range"
 REPEAT = "repeat"
 REPEAT_ONE = "repeat_one"
 SUBPATTERN = "subpattern"
+MIN_REPEAT_ONE = "min_repeat_one"
 
 # positions
 AT_BEGINNING = "at_beginning"
@@ -120,7 +121,8 @@ OPCODES = [
     RANGE,
     REPEAT,
     REPEAT_ONE,
-    SUBPATTERN
+    SUBPATTERN,
+    MIN_REPEAT_ONE
 
 ]
 
index 1b52967a586696f8880a8a334b9334f4a57b7861..fdf6767367f98df038ca0b97a288db5816b6759b 100644 (file)
@@ -419,7 +419,7 @@ def _parse(source, state):
                         set.append(code1)
                         set.append((LITERAL, ord("-")))
                         break
-                    else:
+                    elif this:
                         if this[0] == "\\":
                             code2 = _class_escape(source, this)
                         else:
@@ -431,6 +431,8 @@ def _parse(source, state):
                         if hi < lo:
                             raise error, "bad character range"
                         set.append((RANGE, (lo, hi)))
+                    else:
+                        raise error, "unexpected end of regular expression"
                 else:
                     if code1[0] is IN:
                         code1 = code1[1][0]
index 6a00affa369f641f8bb271fff9dbcd439ef66e03..e4eb08ddd714df4fdad8977f7566077636a4da80 100644 (file)
@@ -83,6 +83,19 @@ test(r"""sre.match(r'(a)?a','a').lastindex""", None)
 test(r"""sre.match(r'(a)(b)?b','ab').lastindex""", 1)
 test(r"""sre.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup""", 'a')
 
+# bug 545855 -- This pattern failed to cause a compile error as it
+# should, instead provoking a TypeError.
+test(r"""sre.compile('foo[a-')""", None, sre.error)
+
+# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
+# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
+# pattern '*?' on a long string.
+test(r"""sre.match('.*?c', 10000*'ab'+'cd').end(0)""", 20001)
+test(r"""sre.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0)""", 20003)
+test(r"""sre.match('.*?cd', 20000*'abc'+'de').end(0)""", 60001)
+# non-simple '*?' still recurses and hits the recursion limit
+test(r"""sre.search('(a|b)*?c', 10000*'ab'+'cd').end(0)""", None, RuntimeError)
+
 if verbose:
     print 'Running tests on sre.sub'
 
index a85273a95a7d0970cf51f4d490bf4b62c439136c..054559bb346ad92168df8be78412bcf204b48121 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -54,6 +54,10 @@ Core and builtins
 Extension modules
 -----------------
 
+- The .*? pattern in the re module is now special-cased to avoid the
+  recursion limit.  (SF patch #720991 -- many thanks to Gary Herron
+  and Greg Chapman.)
+
 - New function sys.call_tracing() allows pdb to debug code
   recursively.
 
index 619d39be6eab7da7baed036f0b419e248245fe39..dde365b956b9c4e43f8ae4124b67068517515f7e 100644 (file)
@@ -993,6 +993,66 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             }
             return 0;
 
+        case SRE_OP_MIN_REPEAT_ONE:
+            /* match repeated sequence (minimizing regexp) */
+
+            /* this operator only works if the repeated item is
+               exactly one character wide, and we're not already
+               collecting backtracking points.  for other cases,
+               use the MIN_REPEAT operator */
+
+            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
+
+            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
+                   pattern[1], pattern[2]));
+
+            if (ptr + pattern[1] > end)
+                return 0; /* cannot match */
+
+            state->ptr = ptr;
+
+            if (pattern[1] == 0)
+                count = 0;
+            else {
+                /* count using pattern min as the maximum */
+                count = SRE_COUNT(state, pattern + 3, pattern[1], level + 1);
+
+                if (count < 0)
+                    return count;   /* exception */
+                if (count < (int) pattern[1])
+                    return 0;       /* did not match minimum number of times */ 
+                ptr += count;       /* advance past minimum matches of repeat */
+            }
+
+            if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
+                /* tail is empty.  we're finished */
+                state->ptr = ptr;
+                return 1;
+
+            } else {
+                /* general case */
+                int matchmax = ((int)pattern[2] == 65535);
+                int c;
+                lastmark = state->lastmark;
+                while (matchmax || count <= (int) pattern[2]) {
+                    state->ptr = ptr;
+                    i = SRE_MATCH(state, pattern + pattern[0], level + 1);
+                    if (i)
+                        return i;
+                    state->ptr = ptr;
+                    c = SRE_COUNT(state, pattern+3, 1, level+1);
+                    if (c < 0)
+                        return c;
+                    if (c == 0)
+                        break;
+                    assert(c == 1);
+                    ptr++;
+                    count++;
+                }
+                lastmark_restore(state, lastmark);
+            }
+            return 0;
+
         case SRE_OP_REPEAT:
             /* create repeat context.  all the hard work is done
                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
index ebb9fd03d2ad77ba0ebf9ce1fb22eff2a08bd9cf..540008e3754eadf08e75e2a29e766caf337f6e0c 100644 (file)
@@ -42,6 +42,7 @@
 #define SRE_OP_REPEAT 27
 #define SRE_OP_REPEAT_ONE 28
 #define SRE_OP_SUBPATTERN 29
+#define SRE_OP_MIN_REPEAT_ONE 30
 #define SRE_AT_BEGINNING 0
 #define SRE_AT_BEGINNING_LINE 1
 #define SRE_AT_BEGINNING_STRING 2