]> granicus.if.org Git - python/commitdiff
-- SRE 0.9.6 sync. this includes:
authorFredrik Lundh <fredrik@pythonware.com>
Sun, 23 Jul 2000 21:46:17 +0000 (21:46 +0000)
committerFredrik Lundh <fredrik@pythonware.com>
Sun, 23 Jul 2000 21:46:17 +0000 (21:46 +0000)
 + added "regs" attribute
 + fixed "pos" and "endpos" attributes
 + reset "lastindex" and "lastgroup" in scanner methods
 + removed (?P#id) syntax; the "lastindex" and "lastgroup"
   attributes are now always set
 + removed string module dependencies in sre_parse
 + better debugging support in sre_parse
 + various tweaks to build under 1.5.2

Lib/sre.py
Lib/sre_compile.py
Lib/sre_constants.py
Lib/sre_parse.py
Lib/test/output/test_sre
Modules/_sre.c
Modules/sre.h

index fbf44a5f3b8d22bc34958fa3e8e3c434ffde580f..6dd1df9c2add716ed3ee03909ccfefdf3e26d18b 100644 (file)
 # other compatibility work.
 #
 
+# FIXME: change all FIXME's to XXX ;-)
+
 import sre_compile
 import sre_parse
 
+import string
+
 # flags
 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
 L = LOCALE = sre_compile.SRE_FLAG_LOCALE
@@ -53,6 +57,9 @@ def findall(pattern, string, maxsplit=0):
 def compile(pattern, flags=0):
     return _compile(pattern, flags)
 
+def purge():
+    _cache.clear()
+
 def template(pattern, flags=0):
     return _compile(pattern, flags|T)
 
@@ -65,7 +72,7 @@ def escape(pattern):
                 s[i] = "\\000"
             else:
                 s[i] = "\\" + c
-    return pattern[:0].join(s)
+    return _join(s, pattern)
 
 # --------------------------------------------------------------------
 # internals
@@ -73,10 +80,14 @@ def escape(pattern):
 _cache = {}
 _MAXCACHE = 100
 
+def _join(seq, sep):
+    # internal: join into string having the same type as sep
+    return string.join(seq, sep[:0])
+
 def _compile(pattern, flags=0):
     # internal: compile pattern
     tp = type(pattern)
-    if tp not in (type(""), type(u"")):
+    if tp not in sre_compile.STRING_TYPES:
         return pattern
     key = (tp, pattern, flags)
     try:
@@ -89,10 +100,6 @@ def _compile(pattern, flags=0):
     _cache[key] = p
     return p
 
-def purge():
-    # clear pattern cache
-    _cache.clear()
-
 def _sub(pattern, template, string, count=0):
     # internal: pattern.sub implementation hook
     return _subn(pattern, template, string, count)[0]
@@ -120,7 +127,7 @@ def _subn(pattern, template, string, count=0):
         i = e
         n = n + 1
     append(string[i:])
-    return string[:0].join(s), n
+    return _join(s, string[:0]), n
 
 def _split(pattern, string, maxsplit=0):
     # internal: pattern.split implementation hook
@@ -161,11 +168,19 @@ copy_reg.pickle(type(_compile("")), _pickle, _compile)
 
 class Scanner:
     def __init__(self, lexicon):
+        from sre_constants import BRANCH, SUBPATTERN, INDEX
         self.lexicon = lexicon
+        # combine phrases into a compound pattern
         p = []
+        s = sre_parse.Pattern()
         for phrase, action in lexicon:
-            p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
-        self.scanner = _compile("|".join(p))
+            p.append(sre_parse.SubPattern(s, [
+                (SUBPATTERN, (None, sre_parse.parse(phrase))),
+                (INDEX, len(p))
+                ]))
+        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
+        s.groups = len(p)
+        self.scanner = sre_compile.compile(p)
     def scan(self, string):
         result = []
         append = result.append
index d8d01eabef9a5547c0dcc5bc164c5a5065869580..ef26e1cab113630a1a830a19ffc6d020a421da15 100644 (file)
@@ -197,10 +197,11 @@ def _compile(code, pattern, flags):
             else:
                 emit(ATCODES[av])
         elif op is BRANCH:
-            emit(OPCODES[op])
             tail = []
             for av in av[1]:
+                emit(OPCODES[op])
                 skip = len(code); emit(0)
+                emit(MAXCODE) # save mark
                 _compile(code, av, flags)
                 emit(OPCODES[JUMP])
                 tail.append(len(code)); emit(0)
@@ -286,11 +287,18 @@ def _compile_info(code, pattern, flags):
         emit(OPCODES[FAILURE])
     code[skip] = len(code) - skip
 
+STRING_TYPES = [type("")]
+
+try:
+    STRING_TYPES.append(type(unicode("")))
+except NameError:
+    pass
+
 def compile(p, flags=0):
     # internal: convert pattern list to internal format
 
     # compile, as necessary
-    if type(p) in (type(""), type(u"")):
+    if type(p) in STRING_TYPES:
         import sre_parse
         pattern = p
         p = sre_parse.parse(p, flags)
@@ -308,6 +316,8 @@ def compile(p, flags=0):
 
     code.append(OPCODES[SUCCESS])
 
+    # print code
+
     # FIXME: <fl> get rid of this limitation!
     assert p.pattern.groups <= 100,\
            "sorry, but this version only supports 100 named groups"
index c2cecdf7ffe0bc2bb44233dccbcf422634297a44..ef32c32bc3aa35e9b2e85ea37b958fe694b8618c 100644 (file)
@@ -172,7 +172,7 @@ CH_UNICODE = {
 # flags
 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
 SRE_FLAG_IGNORECASE = 2 # case insensitive
-SRE_FLAG_LOCALE = 4 # honor system locale
+SRE_FLAG_LOCALE = 4 # honour system locale
 SRE_FLAG_MULTILINE = 8 # treat target as multiline string
 SRE_FLAG_DOTALL = 16 # treat target as a single string
 SRE_FLAG_UNICODE = 32 # use unicode locale
index 053335a4d7de86cd9a3056d13e58e7a3b01ed4bd..1b56352451de95c8c432efd7f50a5a07dc0beac9 100644 (file)
@@ -25,12 +25,12 @@ CHARMASK = 0xff
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS  = "*+?{"
 
-DIGITS = tuple(string.digits)
+DIGITS = tuple("012345689")
 
 OCTDIGITS = tuple("01234567")
 HEXDIGITS = tuple("0123456789abcdefABCDEF")
 
-WHITESPACE = tuple(string.whitespace)
+WHITESPACE = tuple(" \t\n\r\v\f")
 
 ESCAPES = {
     r"\a": (LITERAL, 7),
@@ -68,7 +68,8 @@ FLAGS = {
     "u": SRE_FLAG_UNICODE,
 }
 
-class State:
+class Pattern:
+    # master pattern object.  keeps track of global attributes
     def __init__(self):
         self.flags = 0
         self.groups = 1
@@ -88,6 +89,33 @@ class SubPattern:
             data = []
         self.data = data
         self.width = None
+    def dump(self, level=0):
+        nl = 1
+        for op, av in self.data:
+            print level*"  " + op,; nl = 0
+            if op == "in":
+                # member sublanguage
+                print; nl = 1
+                for op, a in av:
+                    print (level+1)*"  " + op, a
+            elif op == "branch":
+                print; nl = 1
+                i = 0
+                for a in av[1]:
+                    if i > 0:
+                        print level*"  " + "or"
+                    a.dump(level+1); nl = 1
+                    i = i + 1
+            elif type(av) in (type(()), type([])):
+                for a in av:
+                    if isinstance(a, SubPattern):
+                        if not nl: print
+                        a.dump(level+1); nl = 1
+                    else:
+                        print a, ; nl = 0
+            else:
+                print av, ; nl = 0
+            if not nl: print
     def __repr__(self):
         return repr(self.data)
     def __len__(self):
@@ -255,10 +283,25 @@ def _escape(source, escape, state):
         pass
     raise error, "bogus escape: %s" % repr(escape)
 
-def _branch(pattern, items):
-    # form a branch operator from a set of items
+def _parse_sub(source, state, nested=1):
+    # parse an alternation: a|b|c
 
-    subpattern = SubPattern(pattern)
+    items = []
+    while 1:
+        items.append(_parse(source, state))
+        if source.match("|"):
+            continue
+        if not nested:
+            break
+        if not source.next or source.match(")"):
+            break
+        else:
+            raise error, "pattern not properly closed"
+
+    if len(items) == 1:
+        return items[0]
+
+    subpattern = SubPattern(state)
 
     # check if all items share a common prefix
     while 1:
@@ -285,7 +328,7 @@ def _branch(pattern, items):
             break
     else:
         # we can store this as a character set instead of a
-        # branch (FIXME: use a range if possible)
+        # branch (the compiler may optimize this even more)
         set = []
         for item in items:
             set.append(item[0])
@@ -296,8 +339,7 @@ def _branch(pattern, items):
     return subpattern
 
 def _parse(source, state):
-
-    # parse regular expression pattern into an operator list.
+    # parse a simple pattern
 
     subpattern = SubPattern(state)
 
@@ -451,22 +493,6 @@ def _parse(source, state):
                         if gid is None:
                             raise error, "unknown group name"
                         subpattern.append((GROUPREF, gid))
-                    elif source.match("#"):
-                        index = ""
-                        while 1:
-                            char = source.get()
-                            if char is None:
-                                raise error, "unterminated index"
-                            if char == ")":
-                                break
-                            index = index + char
-                        try:
-                            index = int(index)
-                            if index < 0 or index > MAXREPEAT:
-                                raise ValueError
-                        except ValueError:
-                            raise error, "illegal index"
-                        subpattern.append((INDEX, index))
                         continue
                     else:
                         char = source.get()
@@ -491,48 +517,27 @@ def _parse(source, state):
                             raise error, "syntax error"
                         dir = -1 # lookbehind
                         char = source.get()
-                    b = []
-                    while 1:
-                        p = _parse(source, state)
-                        if source.next == ")":
-                            if b:
-                                b.append(p)
-                                p = _branch(state, b)
-                            if char == "=":
-                                subpattern.append((ASSERT, (dir, p)))
-                            else:
-                                subpattern.append((ASSERT_NOT, (dir, p)))
-                            break
-                        elif source.match("|"):
-                            b.append(p)
-                        else:
-                            raise error, "pattern not properly closed"
+                    p = _parse_sub(source, state)
+                    if char == "=":
+                        subpattern.append((ASSERT, (dir, p)))
+                    else:
+                        subpattern.append((ASSERT_NOT, (dir, p)))
+                    continue
                 else:
                     # flags
                     while FLAGS.has_key(source.next):
                         state.flags = state.flags | FLAGS[source.get()]
             if group:
                 # parse group contents
-                b = []
                 if group == 2:
                     # anonymous group
                     group = None
                 else:
                     group = state.getgroup(name)
-                while 1:
-                    p = _parse(source, state)
-                    if group is not None:
-                        p.append((INDEX, group))
-                    if source.match(")"):
-                        if b:
-                            b.append(p)
-                            p = _branch(state, b)
-                        subpattern.append((SUBPATTERN, (group, p)))
-                        break
-                    elif source.match("|"):
-                        b.append(p)
-                    else:
-                        raise error, "group not properly closed"
+                p = _parse_sub(source, state)
+                subpattern.append((SUBPATTERN, (group, p)))
+                if group is not None:
+                    p.append((INDEX, group))
             else:
                 while 1:
                     char = source.get()
@@ -555,26 +560,24 @@ def _parse(source, state):
 
     return subpattern
 
-def parse(pattern, flags=0):
+def parse(str, flags=0):
     # parse 're' pattern into list of (opcode, argument) tuples
-    source = Tokenizer(pattern)
-    state = State()
-    state.flags = flags
-    b = []
-    while 1:
-        p = _parse(source, state)
-        tail = source.get()
-        if tail == "|":
-            b.append(p)
-        elif tail == ")":
-            raise error, "unbalanced parenthesis"
-        elif tail is None:
-            if b:
-                b.append(p)
-                p = _branch(state, b)
-            break
-        else:
-            raise error, "bogus characters at end of regular expression"
+
+    source = Tokenizer(str)
+
+    pattern = Pattern()
+    pattern.flags = flags
+
+    p = _parse_sub(source, pattern, 0)
+
+    tail = source.get()
+    if tail == ")":
+        raise error, "unbalanced parenthesis"
+    elif tail:
+        raise error, "bogus characters at end of regular expression"
+
+    # p.dump()
+
     return p
 
 def parse_template(source, pattern):
@@ -656,4 +659,4 @@ def expand_template(template, match):
             if s is None:
                 raise error, "empty group"
             a(s)
-    return sep.join(p)
+    return string.join(p, sep)
index d949f2551cf8409ff8a7cdcc57ea6e7243c88463..05bcead63b2ee10bb790b658e3a68d70231a498f 100644 (file)
@@ -1,4 +1,6 @@
 test_sre
 === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
 === Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
+=== grouping error ('(a)(b)c|ab', 'ab', 0, 'found+"-"+g1+"-"+g2', 'ab-None-None') 'ab-None-b' should be 'ab-None-None'
+=== grouping error ('(a)+b|aac', 'aac', 0, 'found+"-"+g1', 'aac-None') 'aac-a' should be 'aac-None'
 === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
index e7313917fdecb33489186ea955bfc4a02fb79674..b0efc85d23ae61c84a7887f73eaf2ef57fb2dadb 100644 (file)
@@ -1,28 +1,30 @@
-/* -*- Mode: C; tab-width: 4 -*-
- *
+/*
  * Secret Labs' Regular Expression Engine
  *
  * regular expression matching engine
  *
  * partial history:
- * 99-10-24 fl created (based on existing template matcher code)
- * 99-11-13 fl added categories, branching, and more (0.2)
- * 99-11-16 fl some tweaks to compile on non-Windows platforms
- * 99-12-18 fl non-literals, generic maximizing repeat (0.3)
- * 00-02-28 fl tons of changes (not all to the better ;-) (0.4)
- * 00-03-06 fl first alpha, sort of (0.5)
- * 00-03-14 fl removed most compatibility stuff (0.6)
- * 00-05-10 fl towards third alpha (0.8.2)
- * 00-05-13 fl added experimental scanner stuff (0.8.3)
- * 00-05-27 fl final bug hunt (0.8.4)
- * 00-06-21 fl less bugs, more taste (0.8.5)
- * 00-06-25 fl major changes to better deal with nested repeats (0.9)
- * 00-06-28 fl fixed findall (0.9.1)
- * 00-06-29 fl fixed split, added more scanner features (0.9.2)
- * 00-06-30 fl added fast search optimization (0.9.3)
- * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
- * 00-07-02 fl added charset optimizations, etc (0.9.5)
- * 00-07-03 fl store code in pattern object, lookbehind, etc
+ * 99-10-24 fl  created (based on existing template matcher code)
+ * 99-11-13 fl  added categories, branching, and more (0.2)
+ * 99-11-16 fl  some tweaks to compile on non-Windows platforms
+ * 99-12-18 fl  non-literals, generic maximizing repeat (0.3)
+ * 00-02-28 fl  tons of changes (not all to the better ;-) (0.4)
+ * 00-03-06 fl  first alpha, sort of (0.5)
+ * 00-03-14 fl  removed most compatibility stuff (0.6)
+ * 00-05-10 fl  towards third alpha (0.8.2)
+ * 00-05-13 fl  added experimental scanner stuff (0.8.3)
+ * 00-05-27 fl  final bug hunt (0.8.4)
+ * 00-06-21 fl  less bugs, more taste (0.8.5)
+ * 00-06-25 fl  major changes to better deal with nested repeats (0.9)
+ * 00-06-28 fl  fixed findall (0.9.1)
+ * 00-06-29 fl  fixed split, added more scanner features (0.9.2)
+ * 00-06-30 fl  added fast search optimization (0.9.3)
+ * 00-06-30 fl  added assert (lookahead) primitives, etc (0.9.4)
+ * 00-07-02 fl  added charset optimizations, etc (0.9.5)
+ * 00-07-03 fl  store code in pattern object, lookbehind, etc
+ * 00-07-08 fl  added regs attribute
+ * 00-07-18 fl  changed branch operator to use failure stack
+ * 00-07-21 fl  reset lastindex in scanner methods (0.9.6)
  *
  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
  *
@@ -33,7 +35,7 @@
 
 #ifndef SRE_RECURSIVE
 
-char copyright[] = " SRE 0.9.5 Copyright (c) 1997-2000 by Secret Labs AB ";
+char copyright[] = " SRE 0.9.6 Copyright (c) 1997-2000 by Secret Labs AB ";
 
 #include "Python.h"
 
@@ -51,30 +53,40 @@ char copyright[] = " SRE 0.9.5 Copyright (c) 1997-2000 by Secret Labs AB ";
 #define MODULE "sre"
 
 /* defining this one enables tracing */
-#undef DEBUG
+#undef VERBOSE
 
 #if PY_VERSION_HEX >= 0x01060000
 /* defining this enables unicode support (default under 1.6a1 and later) */
 #define HAVE_UNICODE
 #endif
 
+/* -------------------------------------------------------------------- */
 /* optional features */
+
+/* enables fast searching */
 #define USE_FAST_SEARCH
 
+/* enables aggressive inlining (always on for Visual C) */
+#define USE_INLINE
+
+/* -------------------------------------------------------------------- */
+
 #if defined(_MSC_VER)
 #pragma optimize("agtw", on) /* doesn't seem to make much difference... */
 #pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
 /* fastest possible local call under MSVC */
 #define LOCAL(type) static __inline type __fastcall
-#else
+#elif defined(USE_INLINE)
 #define LOCAL(type) static inline type
+#else
+#define LOCAL(type) static type
 #endif
 
 /* error codes */
 #define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
 #define SRE_ERROR_MEMORY -9 /* out of memory */
 
-#if defined(DEBUG)
+#if defined(VERBOSE)
 #define TRACE(v) printf v
 #else
 #define TRACE(v)
@@ -150,56 +162,56 @@ static unsigned int sre_lower_unicode(unsigned int ch)
 #define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
 #define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
 #define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
-#define SRE_UNI_IS_WORD(ch) (SRE_IS_ALNUM((ch)) || (ch) == '_')
+#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
 #endif
 
 LOCAL(int)
 sre_category(SRE_CODE category, unsigned int ch)
 {
-       switch (category) {
-
-       case SRE_CATEGORY_DIGIT:
-               return SRE_IS_DIGIT(ch);
-       case SRE_CATEGORY_NOT_DIGIT:
-               return !SRE_IS_DIGIT(ch);
-       case SRE_CATEGORY_SPACE:
-               return SRE_IS_SPACE(ch);
-       case SRE_CATEGORY_NOT_SPACE:
-               return !SRE_IS_SPACE(ch);
-       case SRE_CATEGORY_WORD:
-               return SRE_IS_WORD(ch);
-       case SRE_CATEGORY_NOT_WORD:
-               return !SRE_IS_WORD(ch);
-       case SRE_CATEGORY_LINEBREAK:
-               return SRE_IS_LINEBREAK(ch);
-       case SRE_CATEGORY_NOT_LINEBREAK:
-               return !SRE_IS_LINEBREAK(ch);
-
-       case SRE_CATEGORY_LOC_WORD:
-               return SRE_LOC_IS_WORD(ch);
-       case SRE_CATEGORY_LOC_NOT_WORD:
-               return !SRE_LOC_IS_WORD(ch);
+    switch (category) {
+
+    case SRE_CATEGORY_DIGIT:
+        return SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_NOT_DIGIT:
+        return !SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_SPACE:
+        return SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_NOT_SPACE:
+        return !SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_WORD:
+        return SRE_IS_WORD(ch);
+    case SRE_CATEGORY_NOT_WORD:
+        return !SRE_IS_WORD(ch);
+    case SRE_CATEGORY_LINEBREAK:
+        return SRE_IS_LINEBREAK(ch);
+    case SRE_CATEGORY_NOT_LINEBREAK:
+        return !SRE_IS_LINEBREAK(ch);
+
+    case SRE_CATEGORY_LOC_WORD:
+        return SRE_LOC_IS_WORD(ch);
+    case SRE_CATEGORY_LOC_NOT_WORD:
+        return !SRE_LOC_IS_WORD(ch);
 
 #if defined(HAVE_UNICODE)
-       case SRE_CATEGORY_UNI_DIGIT:
-               return SRE_UNI_IS_DIGIT(ch);
-       case SRE_CATEGORY_UNI_NOT_DIGIT:
-               return !SRE_UNI_IS_DIGIT(ch);
-       case SRE_CATEGORY_UNI_SPACE:
-               return SRE_UNI_IS_SPACE(ch);
-       case SRE_CATEGORY_UNI_NOT_SPACE:
-               return !SRE_UNI_IS_SPACE(ch);
-       case SRE_CATEGORY_UNI_WORD:
-               return SRE_UNI_IS_WORD(ch);
-       case SRE_CATEGORY_UNI_NOT_WORD:
-               return !SRE_UNI_IS_WORD(ch);
-       case SRE_CATEGORY_UNI_LINEBREAK:
-               return SRE_UNI_IS_LINEBREAK(ch);
-       case SRE_CATEGORY_UNI_NOT_LINEBREAK:
-               return !SRE_UNI_IS_LINEBREAK(ch);
+    case SRE_CATEGORY_UNI_DIGIT:
+        return SRE_UNI_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_NOT_DIGIT:
+        return !SRE_UNI_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_SPACE:
+        return SRE_UNI_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_NOT_SPACE:
+        return !SRE_UNI_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_WORD:
+        return SRE_UNI_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_NOT_WORD:
+        return !SRE_UNI_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_LINEBREAK:
+        return SRE_UNI_IS_LINEBREAK(ch);
+    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
+        return !SRE_UNI_IS_LINEBREAK(ch);
 #endif
-       }
-       return 0;
+    }
+    return 0;
 }
 
 /* helpers */
@@ -207,55 +219,55 @@ sre_category(SRE_CODE category, unsigned int ch)
 LOCAL(int)
 stack_free(SRE_STATE* state)
 {
-       if (state->stack) {
-               TRACE(("release stack\n"));
-               free(state->stack);
-               state->stack = NULL;
-       }
-       state->stacksize = 0;
-       return 0;
+    if (state->stack) {
+        TRACE(("release stack\n"));
+        free(state->stack);
+        state->stack = NULL;
+    }
+    state->stacksize = 0;
+    return 0;
 }
 
 static int /* shouldn't be LOCAL */
 stack_extend(SRE_STATE* state, int lo, int hi)
 {
-       SRE_STACK* stack;
-       int stacksize;
-
-       /* grow the stack to a suitable size; we need at least lo entries,
-          at most hi entries.  if for some reason hi is lower than lo, lo
-          wins */
-
-       stacksize = state->stacksize;
-
-       if (stacksize == 0) {
-               /* create new stack */
-               stacksize = 512;
-               if (stacksize < lo)
-                       stacksize = lo;
-               else if (stacksize > hi)
-                       stacksize = hi;
-               TRACE(("allocate stack %d\n", stacksize));
-               stack = malloc(sizeof(SRE_STACK) * stacksize);
-       } else {
-               /* grow the stack (typically by a factor of two) */
-               while (stacksize < lo)
-                       stacksize = 2 * stacksize;
-               /* FIXME: <fl> could trim size if it's much larger than hi,
+    SRE_STACK* stack;
+    int stacksize;
+
+    /* grow the stack to a suitable size; we need at least lo entries,
+       at most hi entries.  if for some reason hi is lower than lo, lo
+       wins */
+
+    stacksize = state->stacksize;
+
+    if (stacksize == 0) {
+        /* create new stack */
+        stacksize = 512;
+        if (stacksize < lo)
+            stacksize = lo;
+        else if (stacksize > hi)
+            stacksize = hi;
+        TRACE(("allocate stack %d\n", stacksize));
+        stack = malloc(sizeof(SRE_STACK) * stacksize);
+    } else {
+        /* grow the stack (typically by a factor of two) */
+        while (stacksize < lo)
+            stacksize = 2 * stacksize;
+        /* FIXME: <fl> could trim size if it's much larger than hi,
            as long it's larger than lo */
-               TRACE(("grow stack to %d\n", stacksize));
-               stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
-       }
+        TRACE(("grow stack to %d\n", stacksize));
+        stack = realloc(state->stack, sizeof(SRE_STACK) * stacksize);
+    }
 
-       if (!stack) {
-               stack_free(state);
-               return SRE_ERROR_MEMORY;
-       }
+    if (!stack) {
+        stack_free(state);
+        return SRE_ERROR_MEMORY;
+    }
 
-       state->stack = stack;
-       state->stacksize = stacksize;
+    state->stack = stack;
+    state->stacksize = stacksize;
 
-       return 0;
+    return 0;
 }
 
 /* generate 8-bit version */
@@ -298,80 +310,80 @@ stack_extend(SRE_STATE* state, int lo, int hi)
 LOCAL(int)
 SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
 {
-       /* check if pointer is at given position */
+    /* check if pointer is at given position */
 
-       int this, that;
+    int this, that;
 
-       switch (at) {
+    switch (at) {
 
-       case SRE_AT_BEGINNING:
-               return ((void*) ptr == state->beginning);
+    case SRE_AT_BEGINNING:
+        return ((void*) ptr == state->beginning);
 
-       case SRE_AT_BEGINNING_LINE:
-               return ((void*) ptr == state->beginning ||
+    case SRE_AT_BEGINNING_LINE:
+        return ((void*) ptr == state->beginning ||
                 SRE_IS_LINEBREAK((int) ptr[-1]));
 
-       case SRE_AT_END:
+    case SRE_AT_END:
         return (((void*) (ptr+1) == state->end &&
                  SRE_IS_LINEBREAK((int) ptr[0])) ||
                 ((void*) ptr == state->end));
 
-       case SRE_AT_END_LINE:
-               return ((void*) ptr == state->end ||
+    case SRE_AT_END_LINE:
+        return ((void*) ptr == state->end ||
                 SRE_IS_LINEBREAK((int) ptr[0]));
 
-       case SRE_AT_BOUNDARY:
-               if (state->beginning == state->end)
-                       return 0;
-               that = ((void*) ptr > state->beginning) ?
-                       SRE_IS_WORD((int) ptr[-1]) : 0;
-               this = ((void*) ptr < state->end) ?
-                       SRE_IS_WORD((int) ptr[0]) : 0;
-               return this != that;
-
-       case SRE_AT_NON_BOUNDARY:
-               if (state->beginning == state->end)
-                       return 0;
-               that = ((void*) ptr > state->beginning) ?
-                       SRE_IS_WORD((int) ptr[-1]) : 0;
-               this = ((void*) ptr < state->end) ?
-                       SRE_IS_WORD((int) ptr[0]) : 0;
-               return this == that;
-       }
-
-       return 0;
+    case SRE_AT_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_IS_WORD((int) ptr[0]) : 0;
+        return this != that;
+
+    case SRE_AT_NON_BOUNDARY:
+        if (state->beginning == state->end)
+            return 0;
+        that = ((void*) ptr > state->beginning) ?
+            SRE_IS_WORD((int) ptr[-1]) : 0;
+        this = ((void*) ptr < state->end) ?
+            SRE_IS_WORD((int) ptr[0]) : 0;
+        return this == that;
+    }
+
+    return 0;
 }
 
 LOCAL(int)
 SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
 {
-       /* check if character is a member of the given set */
+    /* check if character is a member of the given set */
 
-       int ok = 1;
+    int ok = 1;
 
-       for (;;) {
-               switch (*set++) {
+    for (;;) {
+        switch (*set++) {
 
-               case SRE_OP_NEGATE:
-                       ok = !ok;
-                       break;
+        case SRE_OP_NEGATE:
+            ok = !ok;
+            break;
 
-               case SRE_OP_FAILURE:
-                       return !ok;
+        case SRE_OP_FAILURE:
+            return !ok;
 
-               case SRE_OP_LITERAL:
+        case SRE_OP_LITERAL:
             /* args: <literal> */
-                       if (ch == set[0])
-                               return ok;
-                       set++;
-                       break;
+            if (ch == set[0])
+                return ok;
+            set++;
+            break;
 
-               case SRE_OP_RANGE:
+        case SRE_OP_RANGE:
             /* args: <lower> <upper> */
-                       if (set[0] <= ch && ch <= set[1])
-                               return ok;
-                       set += 2;
-                       break;
+            if (set[0] <= ch && ch <= set[1])
+                return ok;
+            set += 2;
+            break;
 
         case SRE_OP_CHARSET:
             /* args: <bitmap> (16 bits per code word) */
@@ -380,39 +392,57 @@ SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
             set += 16;
             break;
 
-               case SRE_OP_CATEGORY:
+        case SRE_OP_CATEGORY:
             /* args: <category> */
-                       if (sre_category(set[0], (int) ch))
-                               return ok;
-                       set += 1;
-                       break;
+            if (sre_category(set[0], (int) ch))
+                return ok;
+            set += 1;
+            break;
 
-               default:
-                       /* internal error -- there's not much we can do about it
+        default:
+            /* internal error -- there's not much we can do about it
                here, so let's just pretend it didn't match... */
-                       return 0;
-               }
-       }
+            return 0;
+        }
+    }
 }
 
 LOCAL(int)
 SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 {
-       /* check if string matches the given pattern.  returns -1 for
-          error, 0 for failure, and 1 for success */
+    /* check if string matches the given pattern.  returns -1 for
+       error, 0 for failure, and 1 for success */
 
-       SRE_CHAR* end = state->end;
-       SRE_CHAR* ptr = state->ptr;
-       int stack;
-       int stackbase;
+    SRE_CHAR* end = state->end;
+    SRE_CHAR* ptr = state->ptr;
+    int stack;
+    int stackbase;
     int lastmark;
-       int i, count;
+    int i, count;
     SRE_STACK* sp;
 
     /* FIXME: this is a hack! */
     void* mark_copy[SRE_MARK_SIZE];
     void* mark = NULL;
 
+#define PUSH(skip_, mark_, max_)\
+    if (stack >= state->stacksize) {\
+        i = stack_extend(state, stack + 1, stackbase + max_);\
+        if (i < 0)\
+            return i;\
+    }\
+    TRACE(("%8d: stack[%d]\n", PTR(ptr), stack));\
+    sp = state->stack + (stack++);\
+    sp->ptr = ptr;\
+    sp->pattern = pattern + skip_;\
+    sp->mark = mark_;\
+    if (mark_ != 65535) {\
+        sp->mark0 = state->mark[mark_];\
+        sp->mark1 = state->mark[mark_+1];\
+        TRACE(("          mark %d %d %d\n", mark_, PTR(state->mark[mark_]),\
+               PTR(state->mark[mark_+1])));\
+    }\
+
     TRACE(("%8d: enter\n", PTR(ptr)));
 
     if (pattern[0] == SRE_OP_INFO) {
@@ -431,148 +461,148 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 
   retry:
 
-       for (;;) {
-
-               switch (*pattern++) {
-
-               case SRE_OP_FAILURE:
-                       /* immediate failure */
-                       TRACE(("%8d: failure\n", PTR(ptr)));
-                       goto failure;
-
-               case SRE_OP_SUCCESS:
-                       /* end of pattern */
-                       TRACE(("%8d: success\n", PTR(ptr)));
-                       state->ptr = ptr;
-                       goto success;
-
-               case SRE_OP_AT:
-                       /* match at given position */
-                       /* args: <at> */
-                       TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
-                       if (!SRE_AT(state, ptr, *pattern))
-                               goto failure;
-                       pattern++;
-                       break;
-
-               case SRE_OP_CATEGORY:
-                       /* match at given category */
-                       /* args: <category> */
-                       TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
+    for (;;) {
+
+        switch (*pattern++) {
+
+        case SRE_OP_FAILURE:
+            /* immediate failure */
+            TRACE(("%8d: failure\n", PTR(ptr)));
+            goto failure;
+
+        case SRE_OP_SUCCESS:
+            /* end of pattern */
+            TRACE(("%8d: success\n", PTR(ptr)));
+            state->ptr = ptr;
+            goto success;
+
+        case SRE_OP_AT:
+            /* match at given position */
+            /* args: <at> */
+            TRACE(("%8d: position %d\n", PTR(ptr), *pattern));
+            if (!SRE_AT(state, ptr, *pattern))
+                goto failure;
+            pattern++;
+            break;
+
+        case SRE_OP_CATEGORY:
+            /* match at given category */
+            /* args: <category> */
+            TRACE(("%8d: category %d [category %d]\n", PTR(ptr),
                    *ptr, *pattern));
-                       if (ptr >= end || !sre_category(pattern[0], ptr[0]))
-                               goto failure;
-                       TRACE(("%8d: category ok\n", PTR(ptr)));
-                       pattern++;
+            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
+                goto failure;
+            TRACE(("%8d: category ok\n", PTR(ptr)));
+            pattern++;
+            ptr++;
+            break;
+
+        case SRE_OP_LITERAL:
+            /* match literal string */
+            /* args: <code> */
+            TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
+            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
+                goto failure;
+            pattern++;
+            ptr++;
+            break;
+
+        case SRE_OP_NOT_LITERAL:
+            /* match anything that is not literal character */
+            /* args: <code> */
+            TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
+            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
+                goto failure;
+            pattern++;
             ptr++;
-                       break;
-
-               case SRE_OP_LITERAL:
-                       /* match literal string */
-                       /* args: <code> */
-                       TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
-                       if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
-                               goto failure;
-                       pattern++;
-                       ptr++;
-                       break;
-
-               case SRE_OP_NOT_LITERAL:
-                       /* match anything that is not literal character */
-                       /* args: <code> */
-                       TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
-                       if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
-                               goto failure;
-                       pattern++;
-                       ptr++;
-                       break;
-
-               case SRE_OP_ANY:
-                       /* match anything */
-                       TRACE(("%8d: anything\n", PTR(ptr)));
-                       if (ptr >= end)
-                               goto failure;
-                       ptr++;
-                       break;
-
-               case SRE_OP_IN:
-                       /* match set member (or non_member) */
-                       /* args: <skip> <set> */
-                       TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
-                       if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
-                               goto failure;
-                       pattern += pattern[0];
-                       ptr++;
-                       break;
-
-               case SRE_OP_GROUP:
-                       /* match backreference */
-                       TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
-                       i = pattern[0];
-                       {
-                               SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
-                               SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
-                               if (!p || !e || e < p)
-                                       goto failure;
-                               while (p < e) {
-                                       if (ptr >= end || *ptr != *p)
-                                               goto failure;
-                                       p++; ptr++;
-                               }
-                       }
-                       pattern++;
-                       break;
-
-               case SRE_OP_GROUP_IGNORE:
-                       /* match backreference */
-                       TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
-                       i = pattern[0];
-                       {
-                               SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
-                               SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
-                               if (!p || !e || e < p)
-                                       goto failure;
-                               while (p < e) {
-                                       if (ptr >= end ||
+            break;
+
+        case SRE_OP_ANY:
+            /* match anything */
+            TRACE(("%8d: anything\n", PTR(ptr)));
+            if (ptr >= end)
+                goto failure;
+            ptr++;
+            break;
+
+        case SRE_OP_IN:
+            /* match set member (or non_member) */
+            /* args: <skip> <set> */
+            TRACE(("%8d: set %c\n", PTR(ptr), *ptr));
+            if (ptr >= end || !SRE_MEMBER(pattern + 1, *ptr))
+                goto failure;
+            pattern += pattern[0];
+            ptr++;
+            break;
+
+        case SRE_OP_GROUP:
+            /* match backreference */
+            TRACE(("%8d: group %d\n", PTR(ptr), pattern[0]));
+            i = pattern[0];
+            {
+                SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
+                SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
+                if (!p || !e || e < p)
+                    goto failure;
+                while (p < e) {
+                    if (ptr >= end || *ptr != *p)
+                        goto failure;
+                    p++; ptr++;
+                }
+            }
+            pattern++;
+            break;
+
+        case SRE_OP_GROUP_IGNORE:
+            /* match backreference */
+            TRACE(("%8d: group ignore %d\n", PTR(ptr), pattern[0]));
+            i = pattern[0];
+            {
+                SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
+                SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
+                if (!p || !e || e < p)
+                    goto failure;
+                while (p < e) {
+                    if (ptr >= end ||
                         state->lower(*ptr) != state->lower(*p))
-                                               goto failure;
-                                       p++; ptr++;
-                               }
-                       }
-                       pattern++;
-                       break;
-
-               case SRE_OP_LITERAL_IGNORE:
-                       TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
-                       if (ptr >= end ||
+                        goto failure;
+                    p++; ptr++;
+                }
+            }
+            pattern++;
+            break;
+
+        case SRE_OP_LITERAL_IGNORE:
+            TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
+            if (ptr >= end ||
                 state->lower(*ptr) != state->lower(*pattern))
-                               goto failure;
-                       pattern++;
-                       ptr++;
-                       break;
-
-               case SRE_OP_NOT_LITERAL_IGNORE:
-                       TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
-                       if (ptr >= end ||
+                goto failure;
+            pattern++;
+            ptr++;
+            break;
+
+        case SRE_OP_NOT_LITERAL_IGNORE:
+            TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
+            if (ptr >= end ||
                 state->lower(*ptr) == state->lower(*pattern))
-                               goto failure;
-                       pattern++;
-                       ptr++;
-                       break;
-
-               case SRE_OP_IN_IGNORE:
-                       TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
-                       if (ptr >= end
-                               || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
-                               goto failure;
-                       pattern += pattern[0];
-                       ptr++;
-                       break;
-
-               case SRE_OP_MARK:
-                       /* set mark */
-                       /* args: <mark> */
-                       TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
+                goto failure;
+            pattern++;
+            ptr++;
+            break;
+
+        case SRE_OP_IN_IGNORE:
+            TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
+            if (ptr >= end
+                || !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
+                goto failure;
+            pattern += pattern[0];
+            ptr++;
+            break;
+
+        case SRE_OP_MARK:
+            /* set mark */
+            /* args: <mark> */
+            TRACE(("%8d: set mark %d\n", PTR(ptr), pattern[0]));
             if (state->lastmark < pattern[0]+1)
                 state->lastmark = pattern[0]+1;
             if (!mark) {
@@ -580,216 +610,229 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
                 memcpy(mark, state->mark, state->lastmark*sizeof(void*));
             }
             state->mark[pattern[0]] = ptr;
-                       pattern++;
-                       break;
+            pattern++;
+            break;
 
-               case SRE_OP_INDEX:
-                       /* set index */
-                       /* args: <index> */
-                       TRACE(("%8d: set index %d\n", PTR(ptr), pattern[0]));
+        case SRE_OP_INDEX:
+            /* set index */
+            /* args: <index> */
+            TRACE(("%8d: set index %d\n", PTR(ptr), pattern[0]));
             state->lastindex = pattern[0];
-                       pattern++;
-                       break;
-
-               case SRE_OP_JUMP:
-               case SRE_OP_INFO:
-                       /* jump forward */
-                       /* args: <skip> */
-                       TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
-                       pattern += pattern[0];
-                       break;
-
-               case SRE_OP_ASSERT:
-                       /* assert subpattern */
-                       /* args: <skip> <back> <pattern> */
-                       TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
-                       state->ptr = ptr - pattern[1];
+            pattern++;
+            break;
+
+        case SRE_OP_JUMP:
+        case SRE_OP_INFO:
+            /* jump forward */
+            /* args: <skip> */
+            TRACE(("%8d: jump +%d\n", PTR(ptr), pattern[0]));
+            pattern += pattern[0];
+            break;
+
+        case SRE_OP_ASSERT:
+            /* assert subpattern */
+            /* args: <skip> <back> <pattern> */
+            TRACE(("%8d: assert subpattern %d\n", PTR(ptr), pattern[1]));
+            state->ptr = ptr - pattern[1];
             if (state->ptr < state->beginning)
                 goto failure;
-                       i = SRE_MATCH(state, pattern + 2);
+            i = SRE_MATCH(state, pattern + 2);
             if (i < 0)
                 return i;
             if (!i)
-                               goto failure;
-                       pattern += pattern[0];
-                       break;
-
-               case SRE_OP_ASSERT_NOT:
-                       /* assert not subpattern */
-                       /* args: <skip> <pattern> */
-                       TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
-                       state->ptr = ptr - pattern[1];
+                goto failure;
+            pattern += pattern[0];
+            break;
+
+        case SRE_OP_ASSERT_NOT:
+            /* assert not subpattern */
+            /* args: <skip> <pattern> */
+            TRACE(("%8d: assert not subpattern %d\n", PTR(ptr), pattern[1]));
+            state->ptr = ptr - pattern[1];
             if (state->ptr < state->beginning)
                 goto failure;
-                       i = SRE_MATCH(state, pattern + 2);
+            i = SRE_MATCH(state, pattern + 2);
             if (i < 0)
                 return i;
             if (i)
-                               goto failure;
-                       pattern += pattern[0];
-                       break;
+                goto failure;
+            pattern += pattern[0];
+            break;
+
+        case SRE_OP_BRANCH:
+            /* try an alternate branch */
+            /* format: <branch> <0=skip> <1=mark> <tail...> */
+            TRACE(("%8d: branch\n", PTR(ptr)));
+            if (pattern[2] != SRE_OP_LITERAL ||
+                (ptr < end && (SRE_CODE) ptr[0] == pattern[3])) {
+                /* worth trying */
+                PUSH(pattern[0], pattern[3], 1);
+                pattern += 2;
+            } else
+                pattern += pattern[0];
+            break;
 
 #if 0
-               case SRE_OP_MAX_REPEAT_ONE:
-                       /* match repeated sequence (maximizing regexp) */
+        case SRE_OP_MAX_REPEAT_ONE:
+            /* match repeated sequence (maximizing regexp) */
 
             /* this operator only works if the repeated item is
-                          exactly one character wide, and we're not already
-                          collecting backtracking points.  for other cases,
+               exactly one character wide, and we're not already
+               collecting backtracking points.  for other cases,
                use the MAX_REPEAT operator instead */
 
-                       /* args: <skip> <min> <max> <step> */
-                       TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
-                                  pattern[1], pattern[2]));
-
-                       count = 0;
-
-                       if (pattern[3] == SRE_OP_ANY) {
-                               /* repeated wildcard.  skip to the end of the target
-                                  string, and backtrack from there */
-                               /* FIXME: must look for line endings */
-                               if (ptr + pattern[1] > end)
-                                       goto failure; /* cannot match */
-                               count = pattern[2];
-                               if (count > end - ptr)
-                                       count = end - ptr;
-                               ptr += count;
-
-                       } else if (pattern[3] == SRE_OP_LITERAL) {
-                               /* repeated literal */
-                               SRE_CODE chr = pattern[4];
-                               while (count < (int) pattern[2]) {
-                                       if (ptr >= end || (SRE_CODE) ptr[0] != chr)
-                                               break;
-                                       ptr++;
-                                       count++;
-                               }
-
-                       } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
-                               /* repeated literal */
-                               SRE_CODE chr = pattern[4];
-                               while (count < (int) pattern[2]) {
-                                       if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
-                                               break;
-                                       ptr++;
-                                       count++;
-                               }
-
-                       } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
-                               /* repeated non-literal */
-                               SRE_CODE chr = pattern[4];
-                               while (count < (int) pattern[2]) {
-                                       if (ptr >= end || (SRE_CODE) ptr[0] == chr)
-                                               break;
-                                       ptr++;
-                                       count++;
-                               }
-
-                       } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
-                               /* repeated non-literal */
-                               SRE_CODE chr = pattern[4];
-                               while (count < (int) pattern[2]) {
-                                       if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
-                                               break;
-                                       ptr++;
-                                       count++;
-                               }
-
-                       } else if (pattern[3] == SRE_OP_IN) {
-                               /* repeated set */
-                               while (count < (int) pattern[2]) {
-                                       if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
-                                               break;
-                                       ptr++;
-                                       count++;
-                               }
-
-                       } else {
-                               /* repeated single character pattern */
-                               state->ptr = ptr;
-                               while (count < (int) pattern[2]) {
-                                       i = SRE_MATCH(state, pattern + 3);
-                                       if (i < 0)
-                                               return i;
-                                       if (!i)
-                                               break;
-                                       count++;
-                               }
-                               state->ptr = ptr;
-                               ptr += count;
-                       }
-
-                       /* when we arrive here, count contains the number of
-                          matches, and ptr points to the tail of the target
-                          string.      check if the rest of the pattern matches, and
-                          backtrack if not. */
-
-                       TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
-
-                       if (count < (int) pattern[1])
-                               goto failure;
-
-                       if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
-                               /* tail is empty.  we're finished */
-                               TRACE(("%8d: tail is empty\n", PTR(ptr)));
-                               state->ptr = ptr;
-                               goto success;
-
-                       } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
-                               /* tail starts with a literal. skip positions where
-                                  the rest of the pattern cannot possibly match */
-                               SRE_CODE chr = pattern[pattern[0]+1];
-                               TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
-                               for (;;) {
-                                       TRACE(("%8d: scan for tail match\n", PTR(ptr)));
-                                       while (count >= (int) pattern[1] &&
-                                                  (ptr >= end || *ptr != chr)) {
-                                               ptr--;
-                                               count--;
-                                       }
-                                       TRACE(("%8d: check tail\n", PTR(ptr)));
-                                       if (count < (int) pattern[1])
-                                               break;
-                                       state->ptr = ptr;
-                                       i = SRE_MATCH(state, pattern + pattern[0]);
-                                       if (i > 0) {
-                                               TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
-                                               goto success;
-                                       }
-                                       TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
-                                       ptr--;
-                                       count--;
-                               }
-
-                       } else {
-                               /* general case */
-                               TRACE(("%8d: tail is pattern\n", PTR(ptr)));
-                               while (count >= (int) pattern[1]) {
-                                       state->ptr = ptr;
-                                       i = SRE_MATCH(state, pattern + pattern[0]);
+            /* args: <skip> <min> <max> <step> */
+            TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
+                   pattern[1], pattern[2]));
+
+            count = 0;
+
+            if (pattern[3] == SRE_OP_ANY) {
+                /* repeated wildcard.  skip to the end of the target
+                   string, and backtrack from there */
+                /* FIXME: must look for line endings */
+                if (ptr + pattern[1] > end)
+                    goto failure; /* cannot match */
+                count = pattern[2];
+                if (count > end - ptr)
+                    count = end - ptr;
+                ptr += count;
+
+            } else if (pattern[3] == SRE_OP_LITERAL) {
+                /* repeated literal */
+                SRE_CODE chr = pattern[4];
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || (SRE_CODE) ptr[0] != chr)
+                        break;
+                    ptr++;
+                    count++;
+                }
+
+            } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
+                /* repeated literal */
+                SRE_CODE chr = pattern[4];
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
+                        break;
+                    ptr++;
+                    count++;
+                }
+
+            } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
+                /* repeated non-literal */
+                SRE_CODE chr = pattern[4];
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || (SRE_CODE) ptr[0] == chr)
+                        break;
+                    ptr++;
+                    count++;
+                }
+
+            } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
+                /* repeated non-literal */
+                SRE_CODE chr = pattern[4];
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
+                        break;
+                    ptr++;
+                    count++;
+                }
+
+            } else if (pattern[3] == SRE_OP_IN) {
+                /* repeated set */
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
+                        break;
+                    ptr++;
+                    count++;
+                }
+
+            } else {
+                /* repeated single character pattern */
+                state->ptr = ptr;
+                while (count < (int) pattern[2]) {
+                    i = SRE_MATCH(state, pattern + 3);
+                    if (i < 0)
+                        return i;
+                    if (!i)
+                        break;
+                    count++;
+                }
+                state->ptr = ptr;
+                ptr += count;
+            }
+
+            /* when we arrive here, count contains the number of
+               matches, and ptr points to the tail of the target
+               string.  check if the rest of the pattern matches, and
+               backtrack if not. */
+
+            TRACE(("%8d: repeat %d found\n", PTR(ptr), count));
+
+            if (count < (int) pattern[1])
+                goto failure;
+
+            if (pattern[pattern[0]] == SRE_OP_SUCCESS) {
+                /* tail is empty.  we're finished */
+                TRACE(("%8d: tail is empty\n", PTR(ptr)));
+                state->ptr = ptr;
+                goto success;
+
+            } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
+                /* tail starts with a literal. skip positions where
+                   the rest of the pattern cannot possibly match */
+                SRE_CODE chr = pattern[pattern[0]+1];
+                TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
+                for (;;) {
+                    TRACE(("%8d: scan for tail match\n", PTR(ptr)));
+                    while (count >= (int) pattern[1] &&
+                           (ptr >= end || *ptr != chr)) {
+                        ptr--;
+                        count--;
+                    }
+                    TRACE(("%8d: check tail\n", PTR(ptr)));
+                    if (count < (int) pattern[1])
+                        break;
+                    state->ptr = ptr;
+                    i = SRE_MATCH(state, pattern + pattern[0]);
+                    if (i > 0) {
+                        TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
+                        goto success;
+                    }
+                    TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
+                    ptr--;
+                    count--;
+                }
+
+            } else {
+                /* general case */
+                TRACE(("%8d: tail is pattern\n", PTR(ptr)));
+                while (count >= (int) pattern[1]) {
+                    state->ptr = ptr;
+                    i = SRE_MATCH(state, pattern + pattern[0]);
                     if (i < 0)
                         return i;
-                                       if (i) {
-                                               TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
-                                               goto success;
-                                       }
-                                       TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
-                                       ptr--;
-                                       count--;
-                               }
-                       }
-                       goto failure;
+                    if (i) {
+                        TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
+                        goto success;
+                    }
+                    TRACE(("%8d: BACKTRACK\n", PTR(ptr)));
+                    ptr--;
+                    count--;
+                }
+            }
+            goto failure;
 #endif
 
-               case SRE_OP_MAX_REPEAT:
-                       /* match repeated sequence (maximizing regexp) */
+        case SRE_OP_MAX_REPEAT:
+            /* match repeated sequence (maximizing regexp) */
             /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
 
-                       TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
-                                  pattern[1], pattern[2]));
+            TRACE(("%8d: max repeat (%d %d)\n", PTR(ptr),
+                   pattern[1], pattern[2]));
 
-                       count = 0;
-                       state->ptr = ptr;
+            count = 0;
+            state->ptr = ptr;
 
             /* match minimum number of items */
             while (count < (int) pattern[1]) {
@@ -810,144 +853,106 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 
             TRACE(("%8d: found %d leading items\n", PTR(ptr), count));
 
-                       if (count < (int) pattern[1])
-                               goto failure;
+            if (count < (int) pattern[1])
+                goto failure;
 
             /* match maximum number of items, pushing alternate end
                points to the stack */
 
             while (pattern[2] == 65535 || count < (int) pattern[2]) {
-                               /* this position was valid; add it to the retry
+                /* this position is valid; add it to the retry
                    stack */
-                               if (stack >= state->stacksize) {
-                                       i = stack_extend(state, stack + 1,
-                                      stackbase + pattern[2]);
-                                       if (i < 0)
-                                               return i; /* out of memory */
-                               }
-                TRACE(("%8d: stack[%d]\n", PTR(ptr), stack));
-                TRACE(("          ptr %d mark %d %d %d\n",
-                       PTR(ptr), pattern[3], PTR(mark0), PTR(mark1)));
-                sp = state->stack + stack;
-                               sp->ptr = ptr;
-                               sp->pattern = pattern + pattern[0];
-                sp->mark = pattern[3];
-                if (pattern[3] != 65535) {
-                    sp->mark0 = state->mark[pattern[3]];
-                    sp->mark1 = state->mark[pattern[3]+1];
-                }
-                stack++;
-                               state->stackbase = stack;
-                               i = SRE_MATCH(state, pattern + 4);
-                               state->stackbase = stackbase;
+                PUSH(pattern[0], pattern[3], pattern[2]);
+                /* match more stuff */
+                state->stackbase = stack;
+                i = SRE_MATCH(state, pattern + 4);
+                state->stackbase = stackbase;
                 if (i < 0)
                     return i;
-                               if (!i)
-                                       break;
-                               if (state->ptr == ptr) {
-                                       count = (int) pattern[2];
+                if (!i)
+                    break;
+                if (state->ptr == ptr) {
+                    count = (int) pattern[2];
                     break;
-                               }
-                               /* move forward */
-                               ptr = state->ptr;
-                               count++;
-                       }
+                }
+                /* move forward */
+                ptr = state->ptr;
+                count++;
+            }
 
-                       /* when we get here, count is the number of successful
-                          matches, and ptr points to the tail. */
+            /* when we get here, count is the number of successful
+               matches, and ptr points to the tail. */
 
             TRACE(("%8d: skip +%d\n", PTR(ptr), pattern[0]));
 
             pattern += pattern[0];
             break;
 
-               case SRE_OP_MIN_REPEAT:
-                       /* match repeated sequence (minimizing regexp) */
+        case SRE_OP_MIN_REPEAT:
+            /* match repeated sequence (minimizing regexp) */
             /* args: <skip> <1=min> <2=max> <3=save> <4=item> */
 
-                       TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
-                                  pattern[1], pattern[2]));
-                       count = 0;
-                       state->ptr = ptr;
-                       /* match minimum number of items */
-                       while (count < (int) pattern[1]) {
-                               i = SRE_MATCH(state, pattern + 4);
-                               if (i < 0)
+            TRACE(("%8d: min repeat %d %d\n", PTR(ptr),
+                   pattern[1], pattern[2]));
+            count = 0;
+            state->ptr = ptr;
+            /* match minimum number of items */
+            while (count < (int) pattern[1]) {
+                i = SRE_MATCH(state, pattern + 4);
+                if (i < 0)
                     return i;
                 if (!i)
-                                       goto failure;
-                               count++;
-                       }
-                       /* move forward until the tail matches. */
-                       while (count <= (int) pattern[2]) {
-                               ptr = state->ptr;
-                               i = SRE_MATCH(state, pattern + pattern[0]);
-                               if (i > 0) {
-                                       TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
-                                       goto success;
-                               }
-                               state->ptr = ptr; /* backtrack */
-                               i = SRE_MATCH(state, pattern + 4);
+                    goto failure;
+                count++;
+            }
+            /* move forward until the tail matches. */
+            while (count <= (int) pattern[2]) {
+                ptr = state->ptr;
+                i = SRE_MATCH(state, pattern + pattern[0]);
+                if (i > 0) {
+                    TRACE(("%8d: repeat %d picked\n", PTR(ptr), count));
+                    goto success;
+                }
+                state->ptr = ptr; /* backtrack */
+                i = SRE_MATCH(state, pattern + 4);
                 if (i < 0)
                     return i;
-                               if (!i)
-                                       goto failure;
-                               count++;
-                       }
-                       goto failure;
-
-               case SRE_OP_BRANCH:
-                       /* match one of several subpatterns */
-                       /* format: <branch> <size> <head> ... <null> <tail> */
-                       TRACE(("%8d: branch\n", PTR(ptr)));
-                       while (*pattern) {
-                               if (pattern[1] != SRE_OP_LITERAL ||
-                                       (ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
-                                       TRACE(("%8d: branch check\n", PTR(ptr)));
-                                       state->ptr = ptr;
-                                       i = SRE_MATCH(state, pattern + 1);
-                    if (i < 0)
-                        return i;
-                                       if (i) {
-                                               TRACE(("%8d: branch succeeded\n", PTR(ptr)));
-                                               goto success;
-                                       }
-                               }
-                               pattern += *pattern;
-                       }
-                       TRACE(("%8d: branch failed\n", PTR(ptr)));
-                       goto failure;
-
-               case SRE_OP_REPEAT:
-                       /* TEMPLATE: match repeated sequence (no backtracking) */
-                       /* args: <skip> <min> <max> */
-                       TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
-                       count = 0;
-                       state->ptr = ptr;
-                       while (count < (int) pattern[2]) {
-                               i = SRE_MATCH(state, pattern + 3);
+                if (!i)
+                    goto failure;
+                count++;
+            }
+            goto failure;
+
+        case SRE_OP_REPEAT:
+            /* TEMPLATE: match repeated sequence (no backtracking) */
+            /* args: <skip> <min> <max> */
+            TRACE(("%8d: repeat %d %d\n", PTR(ptr), pattern[1], pattern[2]));
+            count = 0;
+            state->ptr = ptr;
+            while (count < (int) pattern[2]) {
+                i = SRE_MATCH(state, pattern + 3);
                 if (i < 0)
                     return i;
-                               if (!i)
-                                       break;
-                               if (state->ptr == ptr) {
-                                       count = (int) pattern[2];
+                if (!i)
                     break;
-                               }
-                               count++;
-                       }
-                       if (count <= (int) pattern[1])
-                               goto failure;
-                       TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
-                       pattern += pattern[0];
-                       ptr = state->ptr;
-                       break;
+                if (state->ptr == ptr) {
+                    count = (int) pattern[2];
+                    break;
+                }
+                count++;
+            }
+            if (count <= (int) pattern[1])
+                goto failure;
+            TRACE(("%8d: repeat %d matches\n", PTR(ptr), count));
+            pattern += pattern[0];
+            ptr = state->ptr;
+            break;
 
         default:
-                       TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
-                       return SRE_ERROR_ILLEGAL;
-               }
-       }
+            TRACE(("%8d: unknown opcode %d\n", PTR(ptr), pattern[-1]));
+            return SRE_ERROR_ILLEGAL;
+        }
+    }
 
   failure:
     TRACE(("%8d: leave (failure)\n", PTR(ptr)));
@@ -978,9 +983,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
 LOCAL(int)
 SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
 {
-       SRE_CHAR* ptr = state->start;
-       SRE_CHAR* end = state->end;
-       int status = 0;
+    SRE_CHAR* ptr = state->start;
+    SRE_CHAR* end = state->end;
+    int status = 0;
     int prefix_len = 0;
     SRE_CODE* prefix = NULL;
     SRE_CODE* charset = NULL;
@@ -1051,46 +1056,46 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
 #endif
 
     if (pattern[0] == SRE_OP_LITERAL) {
-               /* pattern starts with a literal character.  this is used
+        /* pattern starts with a literal character.  this is used
            for short prefixes, and if fast search is disabled */
-               SRE_CODE chr = pattern[1];
-               for (;;) {
-                       while (ptr < end && (SRE_CODE) ptr[0] != chr)
-                               ptr++;
-                       if (ptr == end)
-                               return 0;
-                       TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
-                       state->start = ptr;
-                       state->ptr = ++ptr;
-                       status = SRE_MATCH(state, pattern + 2);
-                       if (status != 0)
-                               break;
-               }
+        SRE_CODE chr = pattern[1];
+        for (;;) {
+            while (ptr < end && (SRE_CODE) ptr[0] != chr)
+                ptr++;
+            if (ptr == end)
+                return 0;
+            TRACE(("%8d: === SEARCH === literal\n", PTR(ptr)));
+            state->start = ptr;
+            state->ptr = ++ptr;
+            status = SRE_MATCH(state, pattern + 2);
+            if (status != 0)
+                break;
+        }
     } else if (charset) {
-               /* pattern starts with a character from a known set */
-               for (;;) {
-                       while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
-                               ptr++;
-                       if (ptr == end)
-                               return 0;
-                       TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
-                       state->start = ptr;
-                       state->ptr = ptr;
-                       status = SRE_MATCH(state, pattern);
-                       if (status != 0)
-                               break;
+        /* pattern starts with a character from a known set */
+        for (;;) {
+            while (ptr < end && !SRE_MEMBER(charset, ptr[0]))
+                ptr++;
+            if (ptr == end)
+                return 0;
+            TRACE(("%8d: === SEARCH === charset\n", PTR(ptr)));
+            state->start = ptr;
+            state->ptr = ptr;
+            status = SRE_MATCH(state, pattern);
+            if (status != 0)
+                break;
         }
-       } else
-               /* general case */
-               while (ptr <= end) {
-                       TRACE(("%8d: === SEARCH ===\n", PTR(ptr))); 
-                       state->start = state->ptr = ptr++;
-                       status = SRE_MATCH(state, pattern);
-                       if (status != 0)
-                               break;
-               }
-
-       return status;
+    } else
+        /* general case */
+        while (ptr <= end) {
+            TRACE(("%8d: === SEARCH ===\n", PTR(ptr))); 
+            state->start = state->ptr = ptr++;
+            status = SRE_MATCH(state, pattern);
+            if (status != 0)
+                break;
+        }
+
+    return status;
 }
     
 
@@ -1108,70 +1113,74 @@ staticforward PyTypeObject Scanner_Type;
 static PyObject *
 _compile(PyObject* self_, PyObject* args)
 {
-       /* "compile" pattern descriptor to pattern object */
+    /* "compile" pattern descriptor to pattern object */
 
-       PatternObject* self;
+    PatternObject* self;
     int i, n;
 
-       PyObject* pattern;
+    PyObject* pattern;
     int flags = 0;
-       PyObject* code;
-       int groups = 0;
-       PyObject* groupindex = NULL;
+    PyObject* code;
+    int groups = 0;
+    PyObject* groupindex = NULL;
     PyObject* indexgroup = NULL;
-       if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
+    if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
                           &groups, &groupindex, &indexgroup))
-               return NULL;
+        return NULL;
 
-       code = PySequence_Fast(code, "code argument must be a sequence");
-       if (!code)
-               return NULL;
+    code = PySequence_Fast(code, "code argument must be a sequence");
+    if (!code)
+        return NULL;
 
+#if PY_VERSION_HEX >= 0x01060000
     n = PySequence_Size(code);
+#else
+    n = PySequence_Length(code);
+#endif
 
-       self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
-       if (!self) {
+    self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, 100*n);
+    if (!self) {
         Py_DECREF(code);
-               return NULL;
+        return NULL;
     }
 
-       for (i = 0; i < n; i++) {
-               PyObject *o = PySequence_Fast_GET_ITEM(code, i);
+    for (i = 0; i < n; i++) {
+        PyObject *o = PySequence_Fast_GET_ITEM(code, i);
         self->code[i] = (SRE_CODE) PyInt_AsLong(o);
-       }
+    }
 
     Py_DECREF(code);
 
     if (PyErr_Occurred())
         return NULL;
 
-       Py_INCREF(pattern);
-       self->pattern = pattern;
+    Py_INCREF(pattern);
+    self->pattern = pattern;
 
     self->flags = flags;
 
-       self->groups = groups;
+    self->groups = groups;
 
-       Py_XINCREF(groupindex);
-       self->groupindex = groupindex;
+    Py_XINCREF(groupindex);
+    self->groupindex = groupindex;
 
-       Py_XINCREF(indexgroup);
-       self->indexgroup = indexgroup;
+    Py_XINCREF(indexgroup);
+    self->indexgroup = indexgroup;
 
-       return (PyObject*) self;
+    return (PyObject*) self;
 }
 
 static PyObject *
 sre_codesize(PyObject* self, PyObject* args)
 {
-       return Py_BuildValue("i", sizeof(SRE_CODE));
+    return Py_BuildValue("i", sizeof(SRE_CODE));
 }
 
 static PyObject *
 sre_getlower(PyObject* self, PyObject* args)
 {
     int character, flags;
-       if (!PyArg_ParseTuple(args, "ii", &character, &flags))
+    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
         return NULL;
     if (flags & SRE_FLAG_LOCALE)
         return Py_BuildValue("i", sre_lower_locale(character));
@@ -1183,72 +1192,72 @@ sre_getlower(PyObject* self, PyObject* args)
 }
 
 LOCAL(PyObject*)
-state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
+state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
+           int start, int end)
 {
-       /* prepare state object */
-
-       PyBufferProcs *buffer;
-       int i, count;
-       void* ptr;
-
-       PyObject* string;
-       int start = 0;
-       int end = INT_MAX;
-       if (!PyArg_ParseTuple(args, "O|ii", &string, &start, &end))
-               return NULL;
-
-       /* get pointer to string buffer */
-       buffer = string->ob_type->tp_as_buffer;
-       if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
-               buffer->bf_getsegcount(string, NULL) != 1) {
-               PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
-               return NULL;
-       }
-
-       /* determine buffer size */
-       count = buffer->bf_getreadbuffer(string, 0, &ptr);
-       if (count < 0) {
-               /* sanity check */
-               PyErr_SetString(PyExc_TypeError, "buffer has negative size");
-               return NULL;
-       }
-
-       /* determine character size */
+    /* prepare state object */
+
+    PyBufferProcs *buffer;
+    int i, count;
+    void* ptr;
+
+    /* get pointer to string buffer */
+    buffer = string->ob_type->tp_as_buffer;
+    if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
+        buffer->bf_getsegcount(string, NULL) != 1) {
+        PyErr_SetString(PyExc_TypeError, "expected read-only buffer");
+        return NULL;
+    }
+
+    /* determine buffer size */
+    count = buffer->bf_getreadbuffer(string, 0, &ptr);
+    if (count < 0) {
+        /* sanity check */
+        PyErr_SetString(PyExc_TypeError, "buffer has negative size");
+        return NULL;
+    }
+
+    /* determine character size */
 #if defined(HAVE_UNICODE)
-       state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
+    state->charsize = (PyUnicode_Check(string) ? sizeof(Py_UNICODE) : 1);
 #else
-       state->charsize = 1;
+    state->charsize = 1;
 #endif
 
-       count /= state->charsize;
+    count /= state->charsize;
+
+    /* adjust boundaries */
+    if (start < 0)
+        start = 0;
+    else if (start > count)
+        start = count;
 
-       /* adjust boundaries */
-       if (start < 0)
-               start = 0;
-       else if (start > count)
-               start = count;
+    if (end < 0)
+        end = 0;
+    else if (end > count)
+        end = count;
 
-       if (end < 0)
-               end = 0;
-       else if (end > count)
-               end = count;
+    state->beginning = ptr;
 
-       state->beginning = ptr;
+    state->start = (void*) ((char*) ptr + start * state->charsize);
+    state->end = (void*) ((char*) ptr + end * state->charsize);
 
-       state->start = (void*) ((char*) ptr + start * state->charsize);
-       state->end = (void*) ((char*) ptr + end * state->charsize);
+    Py_INCREF(string);
+    state->string = string;
+    state->pos = start;
+    state->endpos = end;
 
     state->lastmark = 0;
 
-       /* FIXME: dynamic! */
-       for (i = 0; i < SRE_MARK_SIZE; i++)
-               state->mark[i] = NULL;
+    /* FIXME: dynamic! */
+    for (i = 0; i < SRE_MARK_SIZE; i++)
+        state->mark[i] = NULL;
 
     state->lastindex = -1;
 
-       state->stack = NULL;
-       state->stackbase = 0;
-       state->stacksize = 0;
+    state->stack = NULL;
+    state->stackbase = 0;
+    state->stacksize = 0;
 
     if (pattern->flags & SRE_FLAG_LOCALE)
         state->lower = sre_lower_locale;
@@ -1259,13 +1268,14 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* args)
     else
         state->lower = sre_lower;
 
-       return string;
+    return string;
 }
 
 LOCAL(void)
 state_fini(SRE_STATE* state)
 {
-       stack_free(state);
+    Py_XDECREF(state->string);
+    stack_free(state);
 }
 
 LOCAL(PyObject*)
@@ -1273,97 +1283,102 @@ state_getslice(SRE_STATE* state, int index, PyObject* string)
 {
     index = (index - 1) * 2;
 
-       if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
-               Py_INCREF(Py_None);
-               return Py_None;
-       }
+    if (string == Py_None || !state->mark[index] || !state->mark[index+1]) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
 
-       return PySequence_GetSlice(
-               string,
+    return PySequence_GetSlice(
+        string,
         ((char*)state->mark[index] - (char*)state->beginning) /
         state->charsize,
         ((char*)state->mark[index+1] - (char*)state->beginning) /
         state->charsize
-               );
+        );
 }
 
 static PyObject*
-pattern_new_match(PatternObject* pattern, SRE_STATE* state,
-                   PyObject* string, int status)
+pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
 {
-       /* create match object (from state object) */
+    /* create match object (from state object) */
 
-       MatchObject* match;
-       int i, j;
+    MatchObject* match;
+    int i, j;
     char* base;
     int n;
 
-       if (status > 0) {
+    if (status > 0) {
 
-               /* create match object (with room for extra group marks) */
-               match = PyObject_NEW_VAR(MatchObject, &Match_Type,
+        /* create match object (with room for extra group marks) */
+        match = PyObject_NEW_VAR(MatchObject, &Match_Type,
                                  2*(pattern->groups+1));
-               if (!match)
-                       return NULL;
+        if (!match)
+            return NULL;
+
+        Py_INCREF(pattern);
+        match->pattern = pattern;
 
-               Py_INCREF(pattern);
-               match->pattern = pattern;
+        Py_INCREF(state->string);
+        match->string = state->string;
 
-               Py_INCREF(string);
-               match->string = string;
+        match->regs = NULL;
+        match->groups = pattern->groups+1;
 
-               match->groups = pattern->groups+1;
+        /* fill in group slices */
 
         base = (char*) state->beginning;
         n = state->charsize;
 
-               /* group zero */
-               match->mark[0] = ((char*) state->start - base) / n;
-               match->mark[1] = ((char*) state->ptr - base) / n;
+        match->mark[0] = ((char*) state->start - base) / n;
+        match->mark[1] = ((char*) state->ptr - base) / n;
 
-               /* fill in the rest of the groups */
-               for (i = j = 0; i < pattern->groups; i++, j+=2)
-                       if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
-                               match->mark[j+2] = ((char*) state->mark[j] - base) / n;
-                               match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
-                       } else
-                               match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
+        for (i = j = 0; i < pattern->groups; i++, j+=2)
+            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
+                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
+                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
+            } else
+                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
 
-        match->lastindex = state->lastindex;
+        match->pos = state->pos;
+        match->endpos = state->endpos;
 
-        match->pos = ((char*) state->start - base) / n;
-        match->endpos = ((char*) state->end - base) / n;
+        match->lastindex = state->lastindex;
 
-               return (PyObject*) match;
+        return (PyObject*) match;
 
-       } else if (status < 0) {
+    } else if (status < 0) {
 
-               /* internal error */
-               PyErr_SetString(
-                       PyExc_RuntimeError, "internal error in regular expression engine"
-                       );
-               return NULL;
+        /* internal error */
+        PyErr_SetString(
+            PyExc_RuntimeError, "internal error in regular expression engine"
+            );
+        return NULL;
 
-       }
+    }
 
-       Py_INCREF(Py_None);
-       return Py_None;
+    Py_INCREF(Py_None);
+    return Py_None;
 }
 
 static PyObject*
 pattern_scanner(PatternObject* pattern, PyObject* args)
 {
-       /* create search state object */
+    /* create search state object */
+
+    ScannerObject* self;
 
-       ScannerObject* self;
     PyObject* string;
+    int start = 0;
+    int end = INT_MAX;
+    if (!PyArg_ParseTuple(args, "O|ii:scanner", &string, &start, &end))
+        return NULL;
 
-    /* create match object (with room for extra group marks) */
+    /* create scanner object */
     self = PyObject_NEW(ScannerObject, &Scanner_Type);
     if (!self)
         return NULL;
 
-    string = state_init(&self->state, pattern, args);
+    string = state_init(&self->state, pattern, string, start, end);
     if (!string) {
         PyObject_Del(self);
         return NULL;
@@ -1372,68 +1387,75 @@ pattern_scanner(PatternObject* pattern, PyObject* args)
     Py_INCREF(pattern);
     self->pattern = (PyObject*) pattern;
 
-    Py_INCREF(string);
-    self->string = string;
-
-       return (PyObject*) self;
+    return (PyObject*) self;
 }
 
 static void
 pattern_dealloc(PatternObject* self)
 {
-       Py_XDECREF(self->pattern);
-       Py_XDECREF(self->groupindex);
-       PyObject_DEL(self);
+    Py_XDECREF(self->pattern);
+    Py_XDECREF(self->groupindex);
+    PyObject_DEL(self);
 }
 
 static PyObject*
 pattern_match(PatternObject* self, PyObject* args)
 {
-       SRE_STATE state;
-       PyObject* string;
-       int status;
+    SRE_STATE state;
+    int status;
+
+    PyObject* string;
+    int start = 0;
+    int end = INT_MAX;
+    if (!PyArg_ParseTuple(args, "O|ii:match", &string, &start, &end))
+        return NULL;
 
-       string = state_init(&state, self, args);
-       if (!string)
-               return NULL;
+    string = state_init(&state, self, string, start, end);
+    if (!string)
+        return NULL;
 
-       state.ptr = state.start;
+    state.ptr = state.start;
 
-       if (state.charsize == 1) {
-               status = sre_match(&state, PatternObject_GetCode(self));
-       } else {
+    if (state.charsize == 1) {
+        status = sre_match(&state, PatternObject_GetCode(self));
+    } else {
 #if defined(HAVE_UNICODE)
-               status = sre_umatch(&state, PatternObject_GetCode(self));
+        status = sre_umatch(&state, PatternObject_GetCode(self));
 #endif
-       }
+    }
 
-       state_fini(&state);
+    state_fini(&state);
 
-       return pattern_new_match(self, &state, string, status);
+    return pattern_new_match(self, &state, status);
 }
 
 static PyObject*
 pattern_search(PatternObject* self, PyObject* args)
 {
-       SRE_STATE state;
-       PyObject* string;
-       int status;
+    SRE_STATE state;
+    int status;
 
-       string = state_init(&state, self, args);
-       if (!string)
-               return NULL;
+    PyObject* string;
+    int start = 0;
+    int end = INT_MAX;
+    if (!PyArg_ParseTuple(args, "O|ii:search", &string, &start, &end))
+        return NULL;
+
+    string = state_init(&state, self, string, start, end);
+    if (!string)
+        return NULL;
 
-       if (state.charsize == 1) {
-               status = sre_search(&state, PatternObject_GetCode(self));
-       } else {
+    if (state.charsize == 1) {
+        status = sre_search(&state, PatternObject_GetCode(self));
+    } else {
 #if defined(HAVE_UNICODE)
-               status = sre_usearch(&state, PatternObject_GetCode(self));
+        status = sre_usearch(&state, PatternObject_GetCode(self));
 #endif
-       }
+    }
 
-       state_fini(&state);
+    state_fini(&state);
 
-       return pattern_new_match(self, &state, string, status);
+    return pattern_new_match(self, &state, status);
 }
 
 static PyObject*
@@ -1464,11 +1486,11 @@ call(char* function, PyObject* args)
 static PyObject*
 pattern_sub(PatternObject* self, PyObject* args)
 {
-       PyObject* template;
-       PyObject* string;
+    PyObject* template;
+    PyObject* string;
     PyObject* count = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "OO|O", &template, &string, &count))
-               return NULL;
+    if (!PyArg_ParseTuple(args, "OO|O:sub", &template, &string, &count))
+        return NULL;
 
     /* delegate to Python code */
     return call("_sub", Py_BuildValue("OOOO", self, template, string, count));
@@ -1477,11 +1499,11 @@ pattern_sub(PatternObject* self, PyObject* args)
 static PyObject*
 pattern_subn(PatternObject* self, PyObject* args)
 {
-       PyObject* template;
-       PyObject* string;
+    PyObject* template;
+    PyObject* string;
     PyObject* count = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "OO|O", &template, &string, &count))
-               return NULL;
+    if (!PyArg_ParseTuple(args, "OO|O:subn", &template, &string, &count))
+        return NULL;
 
     /* delegate to Python code */
     return call("_subn", Py_BuildValue("OOOO", self, template, string, count));
@@ -1490,10 +1512,10 @@ pattern_subn(PatternObject* self, PyObject* args)
 static PyObject*
 pattern_split(PatternObject* self, PyObject* args)
 {
-       PyObject* string;
+    PyObject* string;
     PyObject* maxsplit = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "O|O", &string, &maxsplit))
-               return NULL;
+    if (!PyArg_ParseTuple(args, "O|O:split", &string, &maxsplit))
+        return NULL;
 
     /* delegate to Python code */
     return call("_split", Py_BuildValue("OOO", self, string, maxsplit));
@@ -1502,31 +1524,36 @@ pattern_split(PatternObject* self, PyObject* args)
 static PyObject*
 pattern_findall(PatternObject* self, PyObject* args)
 {
-       SRE_STATE state;
-       PyObject* string;
-       PyObject* list;
-       int status;
+    SRE_STATE state;
+    PyObject* list;
+    int status;
     int i;
 
-       string = state_init(&state, self, args);
-       if (!string)
-               return NULL;
+    PyObject* string;
+    int start = 0;
+    int end = INT_MAX;
+    if (!PyArg_ParseTuple(args, "O|ii:findall", &string, &start, &end))
+        return NULL;
+
+    string = state_init(&state, self, string, start, end);
+    if (!string)
+        return NULL;
 
-       list = PyList_New(0);
+    list = PyList_New(0);
 
-       while (state.start <= state.end) {
+    while (state.start <= state.end) {
 
-               PyObject* item;
-               
-               state.ptr = state.start;
+        PyObject* item;
+        
+        state.ptr = state.start;
 
-               if (state.charsize == 1) {
-                       status = sre_search(&state, PatternObject_GetCode(self));
-               } else {
+        if (state.charsize == 1) {
+            status = sre_search(&state, PatternObject_GetCode(self));
+        } else {
 #if defined(HAVE_UNICODE)
-                       status = sre_usearch(&state, PatternObject_GetCode(self));
+            status = sre_usearch(&state, PatternObject_GetCode(self));
 #endif
-               }
+        }
 
         if (status > 0) {
 
@@ -1567,46 +1594,46 @@ pattern_findall(PatternObject* self, PyObject* args)
                 goto error;
             }
 
-                       if (state.ptr == state.start)
-                               state.start = (void*) ((char*) state.ptr + state.charsize);
+            if (state.ptr == state.start)
+                state.start = (void*) ((char*) state.ptr + state.charsize);
             else
                 state.start = state.ptr;
 
-               } else {
+        } else {
 
             if (status == 0)
                 break;
 
-                       /* internal error */
-                       PyErr_SetString(
-                               PyExc_RuntimeError,
-                               "internal error in regular expression engine"
-                               );
-                       goto error;
+            /* internal error */
+            PyErr_SetString(
+                PyExc_RuntimeError,
+                "internal error in regular expression engine"
+                );
+            goto error;
 
-               }
-       }
+        }
+    }
 
-       state_fini(&state);
-       return list;
+    state_fini(&state);
+    return list;
 
 error:
     Py_DECREF(list);
-       state_fini(&state);
-       return NULL;
-       
+    state_fini(&state);
+    return NULL;
+    
 }
 
 static PyMethodDef pattern_methods[] = {
-       {"match", (PyCFunction) pattern_match, 1},
-       {"search", (PyCFunction) pattern_search, 1},
-       {"sub", (PyCFunction) pattern_sub, 1},
-       {"subn", (PyCFunction) pattern_subn, 1},
-       {"split", (PyCFunction) pattern_split, 1},
-       {"findall", (PyCFunction) pattern_findall, 1},
+    {"match", (PyCFunction) pattern_match, 1},
+    {"search", (PyCFunction) pattern_search, 1},
+    {"sub", (PyCFunction) pattern_sub, 1},
+    {"subn", (PyCFunction) pattern_subn, 1},
+    {"split", (PyCFunction) pattern_split, 1},
+    {"findall", (PyCFunction) pattern_findall, 1},
     /* experimental */
-       {"scanner", (PyCFunction) pattern_scanner, 1},
-       {NULL, NULL}
+    {"scanner", (PyCFunction) pattern_scanner, 1},
+    {NULL, NULL}
 };
 
 static PyObject*  
@@ -1614,41 +1641,41 @@ pattern_getattr(PatternObject* self, char* name)
 {
     PyObject* res;
 
-       res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
+    res = Py_FindMethod(pattern_methods, (PyObject*) self, name);
 
-       if (res)
-               return res;
+    if (res)
+        return res;
 
-       PyErr_Clear();
+    PyErr_Clear();
 
     /* attributes */
-       if (!strcmp(name, "pattern")) {
+    if (!strcmp(name, "pattern")) {
         Py_INCREF(self->pattern);
-               return self->pattern;
+        return self->pattern;
     }
 
     if (!strcmp(name, "flags"))
-               return Py_BuildValue("i", self->flags);
+        return Py_BuildValue("i", self->flags);
 
     if (!strcmp(name, "groups"))
-               return Py_BuildValue("i", self->groups);
+        return Py_BuildValue("i", self->groups);
 
-       if (!strcmp(name, "groupindex") && self->groupindex) {
+    if (!strcmp(name, "groupindex") && self->groupindex) {
         Py_INCREF(self->groupindex);
-               return self->groupindex;
+        return self->groupindex;
     }
 
-       PyErr_SetString(PyExc_AttributeError, name);
-       return NULL;
+    PyErr_SetString(PyExc_AttributeError, name);
+    return NULL;
 }
 
 statichere PyTypeObject Pattern_Type = {
-       PyObject_HEAD_INIT(NULL)
-       0, "SRE_Pattern",
+    PyObject_HEAD_INIT(NULL)
+    0, "SRE_Pattern",
     sizeof(PatternObject), sizeof(SRE_CODE),
-       (destructor)pattern_dealloc, /*tp_dealloc*/
-       0, /*tp_print*/
-       (getattrfunc)pattern_getattr /*tp_getattr*/
+    (destructor)pattern_dealloc, /*tp_dealloc*/
+    0, /*tp_print*/
+    (getattrfunc)pattern_getattr /*tp_getattr*/
 };
 
 /* -------------------------------------------------------------------- */
@@ -1657,34 +1684,35 @@ statichere PyTypeObject Pattern_Type = {
 static void
 match_dealloc(MatchObject* self)
 {
-       Py_XDECREF(self->string);
-       Py_DECREF(self->pattern);
-       PyObject_DEL(self);
+    Py_XDECREF(self->regs);
+    Py_XDECREF(self->string);
+    Py_DECREF(self->pattern);
+    PyObject_DEL(self);
 }
 
 static PyObject*
 match_getslice_by_index(MatchObject* self, int index, PyObject* def)
 {
-       if (index < 0 || index >= self->groups) {
-               /* raise IndexError if we were given a bad group number */
-               PyErr_SetString(
-                       PyExc_IndexError,
-                       "no such group"
-                       );
-               return NULL;
-       }
+    if (index < 0 || index >= self->groups) {
+        /* raise IndexError if we were given a bad group number */
+        PyErr_SetString(
+            PyExc_IndexError,
+            "no such group"
+            );
+        return NULL;
+    }
 
     index *= 2;
 
-       if (self->string == Py_None || self->mark[index] < 0) {
-               /* return default value if the string or group is undefined */
-               Py_INCREF(def);
-               return def;
-       }
+    if (self->string == Py_None || self->mark[index] < 0) {
+        /* return default value if the string or group is undefined */
+        Py_INCREF(def);
+        return def;
+    }
 
-       return PySequence_GetSlice(
-               self->string, self->mark[index], self->mark[index+1]
-               );
+    return PySequence_GetSlice(
+        self->string, self->mark[index], self->mark[index+1]
+        );
 }
 
 static int
@@ -1692,20 +1720,20 @@ match_getindex(MatchObject* self, PyObject* index)
 {
     int i;
 
-       if (PyInt_Check(index))
+    if (PyInt_Check(index))
         return (int) PyInt_AS_LONG(index);
 
     i = -1;
 
-       if (self->pattern->groupindex) {
-               index = PyObject_GetItem(self->pattern->groupindex, index);
-               if (index) {
+    if (self->pattern->groupindex) {
+        index = PyObject_GetItem(self->pattern->groupindex, index);
+        if (index) {
             if (PyInt_Check(index))
                 i = (int) PyInt_AS_LONG(index);
             Py_DECREF(index);
         } else
             PyErr_Clear();
-       }
+    }
 
     return i;
 }
@@ -1713,115 +1741,115 @@ match_getindex(MatchObject* self, PyObject* index)
 static PyObject*
 match_getslice(MatchObject* self, PyObject* index, PyObject* def)
 {
-       return match_getslice_by_index(self, match_getindex(self, index), def);
+    return match_getslice_by_index(self, match_getindex(self, index), def);
 }
 
 static PyObject*
 match_group(MatchObject* self, PyObject* args)
 {
-       PyObject* result;
-       int i, size;
-
-       size = PyTuple_GET_SIZE(args);
-
-       switch (size) {
-       case 0:
-               result = match_getslice(self, Py_False, Py_None);
-               break;
-       case 1:
-               result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
-               break;
-       default:
-               /* fetch multiple items */
-               result = PyTuple_New(size);
-               if (!result)
-                       return NULL;
-               for (i = 0; i < size; i++) {
-                       PyObject* item = match_getslice(
+    PyObject* result;
+    int i, size;
+
+    size = PyTuple_GET_SIZE(args);
+
+    switch (size) {
+    case 0:
+        result = match_getslice(self, Py_False, Py_None);
+        break;
+    case 1:
+        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
+        break;
+    default:
+        /* fetch multiple items */
+        result = PyTuple_New(size);
+        if (!result)
+            return NULL;
+        for (i = 0; i < size; i++) {
+            PyObject* item = match_getslice(
                 self, PyTuple_GET_ITEM(args, i), Py_None
                 );
-                       if (!item) {
-                               Py_DECREF(result);
-                               return NULL;
-                       }
-                       PyTuple_SET_ITEM(result, i, item);
-               }
-               break;
-       }
-       return result;
+            if (!item) {
+                Py_DECREF(result);
+                return NULL;
+            }
+            PyTuple_SET_ITEM(result, i, item);
+        }
+        break;
+    }
+    return result;
 }
 
 static PyObject*
 match_groups(MatchObject* self, PyObject* args)
 {
-       PyObject* result;
-       int index;
-
-       PyObject* def = Py_None;
-       if (!PyArg_ParseTuple(args, "|O", &def))
-               return NULL;
-
-       result = PyTuple_New(self->groups-1);
-       if (!result)
-               return NULL;
-
-       for (index = 1; index < self->groups; index++) {
-               PyObject* item;
-               item = match_getslice_by_index(self, index, def);
-               if (!item) {
-                       Py_DECREF(result);
-                       return NULL;
-               }
-               PyTuple_SET_ITEM(result, index-1, item);
-       }
-
-       return result;
+    PyObject* result;
+    int index;
+
+    PyObject* def = Py_None;
+    if (!PyArg_ParseTuple(args, "|O:groups", &def))
+        return NULL;
+
+    result = PyTuple_New(self->groups-1);
+    if (!result)
+        return NULL;
+
+    for (index = 1; index < self->groups; index++) {
+        PyObject* item;
+        item = match_getslice_by_index(self, index, def);
+        if (!item) {
+            Py_DECREF(result);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(result, index-1, item);
+    }
+
+    return result;
 }
 
 static PyObject*
 match_groupdict(MatchObject* self, PyObject* args)
 {
-       PyObject* result;
-       PyObject* keys;
-       int index;
+    PyObject* result;
+    PyObject* keys;
+    int index;
 
-       PyObject* def = Py_None;
-       if (!PyArg_ParseTuple(args, "|O", &def))
-               return NULL;
+    PyObject* def = Py_None;
+    if (!PyArg_ParseTuple(args, "|O:groupdict", &def))
+        return NULL;
 
-       result = PyDict_New();
-       if (!result || !self->pattern->groupindex)
-               return result;
+    result = PyDict_New();
+    if (!result || !self->pattern->groupindex)
+        return result;
 
-       keys = PyMapping_Keys(self->pattern->groupindex);
-       if (!keys) {
+    keys = PyMapping_Keys(self->pattern->groupindex);
+    if (!keys) {
         Py_DECREF(result);
-               return NULL;
+        return NULL;
+    }
+
+    for (index = 0; index < PyList_GET_SIZE(keys); index++) {
+        PyObject* key;
+        PyObject* item;
+        key = PyList_GET_ITEM(keys, index);
+        if (!key) {
+            Py_DECREF(keys);
+            Py_DECREF(result);
+            return NULL;
+        }
+        item = match_getslice(self, key, def);
+        if (!item) {
+            Py_DECREF(key);
+            Py_DECREF(keys);
+            Py_DECREF(result);
+            return NULL;
+        }
+        /* FIXME: <fl> this can fail, right? */
+        PyDict_SetItem(result, key, item);
     }
 
-       for (index = 0; index < PyList_GET_SIZE(keys); index++) {
-               PyObject* key;
-               PyObject* item;
-               key = PyList_GET_ITEM(keys, index);
-               if (!key) {
-                       Py_DECREF(keys);
-                       Py_DECREF(result);
-                       return NULL;
-               }
-               item = match_getslice(self, key, def);
-               if (!item) {
-                       Py_DECREF(key);
-                       Py_DECREF(keys);
-                       Py_DECREF(result);
-                       return NULL;
-               }
-               /* FIXME: <fl> this can fail, right? */
-               PyDict_SetItem(result, key, item);
-       }
-
-       Py_DECREF(keys);
-
-       return result;
+    Py_DECREF(keys);
+
+    return result;
 }
 
 static PyObject*
@@ -1829,26 +1857,26 @@ match_start(MatchObject* self, PyObject* args)
 {
     int index;
 
-       PyObject* index_ = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "|O", &index_))
-               return NULL;
+    PyObject* index_ = Py_False; /* zero */
+    if (!PyArg_ParseTuple(args, "|O:start", &index_))
+        return NULL;
 
     index = match_getindex(self, index_);
 
-       if (index < 0 || index >= self->groups) {
-               PyErr_SetString(
-                       PyExc_IndexError,
-                       "no such group"
-                       );
-               return NULL;
-       }
+    if (index < 0 || index >= self->groups) {
+        PyErr_SetString(
+            PyExc_IndexError,
+            "no such group"
+            );
+        return NULL;
+    }
 
-       if (self->mark[index*2] < 0) {
-               Py_INCREF(Py_None);
-               return Py_None;
-       }
+    if (self->mark[index*2] < 0) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
 
-       return Py_BuildValue("i", self->mark[index*2]);
+    return Py_BuildValue("i", self->mark[index*2]);
 }
 
 static PyObject*
@@ -1856,26 +1884,53 @@ match_end(MatchObject* self, PyObject* args)
 {
     int index;
 
-       PyObject* index_ = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "|O", &index_))
-               return NULL;
+    PyObject* index_ = Py_False; /* zero */
+    if (!PyArg_ParseTuple(args, "|O:end", &index_))
+        return NULL;
 
     index = match_getindex(self, index_);
 
-       if (index < 0 || index >= self->groups) {
-               PyErr_SetString(
-                       PyExc_IndexError,
-                       "no such group"
-                       );
-               return NULL;
-       }
+    if (index < 0 || index >= self->groups) {
+        PyErr_SetString(
+            PyExc_IndexError,
+            "no such group"
+            );
+        return NULL;
+    }
 
-       if (self->mark[index*2] < 0) {
-               Py_INCREF(Py_None);
-               return Py_None;
-       }
+    if (self->mark[index*2] < 0) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+
+    return Py_BuildValue("i", self->mark[index*2+1]);
+}
+
+LOCAL(PyObject*)
+_pair(int i1, int i2)
+{
+    PyObject* pair;
+    PyObject* item;
+
+    pair = PyTuple_New(2);
+    if (!pair)
+        return NULL;
+
+    item = PyInt_FromLong(i1);
+    if (!item)
+        goto error;
+    PyTuple_SET_ITEM(pair, 0, item);
+
+    item = PyInt_FromLong(i2);
+    if (!item)
+        goto error;
+    PyTuple_SET_ITEM(pair, 1, item);
 
-       return Py_BuildValue("i", self->mark[index*2+1]);
+    return pair;
+
+  error:
+    Py_DECREF(pair);
+    return NULL;
 }
 
 static PyObject*
@@ -1883,52 +1938,77 @@ match_span(MatchObject* self, PyObject* args)
 {
     int index;
 
-       PyObject* index_ = Py_False; /* zero */
-       if (!PyArg_ParseTuple(args, "|O", &index_))
-               return NULL;
+    PyObject* index_ = Py_False; /* zero */
+    if (!PyArg_ParseTuple(args, "|O:span", &index_))
+        return NULL;
 
     index = match_getindex(self, index_);
 
-       if (index < 0 || index >= self->groups) {
-               PyErr_SetString(
-                       PyExc_IndexError,
-                       "no such group"
-                       );
-               return NULL;
-       }
-
-       if (self->mark[index*2] < 0) {
-               Py_INCREF(Py_None);
-               Py_INCREF(Py_None);
+    if (index < 0 || index >= self->groups) {
+        PyErr_SetString(
+            PyExc_IndexError,
+            "no such group"
+            );
+        return NULL;
+    }
+
+    if (self->mark[index*2] < 0) {
+        Py_INCREF(Py_None);
+        Py_INCREF(Py_None);
         return Py_BuildValue("OO", Py_None, Py_None);
-       }
+    }
 
-       return Py_BuildValue("ii", self->mark[index*2], self->mark[index*2+1]);
+    return _pair(self->mark[index*2], self->mark[index*2+1]);
+}
+
+static PyObject*
+match_regs(MatchObject* self)
+{
+    PyObject* regs;
+    PyObject* item;
+    int index;
+
+    regs = PyTuple_New(self->groups);
+    if (!regs)
+        return NULL;
+
+    for (index = 0; index < self->groups; index++) {
+        item = _pair(self->mark[index*2], self->mark[index*2+1]);
+        if (!item) {
+            Py_DECREF(regs);
+            return NULL;
+        }
+        PyTuple_SET_ITEM(regs, index, item);
+    }
+
+    Py_INCREF(regs);
+    self->regs = regs;
+
+    return regs;
 }
 
 static PyMethodDef match_methods[] = {
-       {"group", (PyCFunction) match_group, 1},
-       {"start", (PyCFunction) match_start, 1},
-       {"end", (PyCFunction) match_end, 1},
-       {"span", (PyCFunction) match_span, 1},
-       {"groups", (PyCFunction) match_groups, 1},
-       {"groupdict", (PyCFunction) match_groupdict, 1},
-       {NULL, NULL}
+    {"group", (PyCFunction) match_group, 1},
+    {"start", (PyCFunction) match_start, 1},
+    {"end", (PyCFunction) match_end, 1},
+    {"span", (PyCFunction) match_span, 1},
+    {"groups", (PyCFunction) match_groups, 1},
+    {"groupdict", (PyCFunction) match_groupdict, 1},
+    {NULL, NULL}
 };
 
 static PyObject*  
 match_getattr(MatchObject* self, char* name)
 {
-       PyObject* res;
+    PyObject* res;
 
-       res = Py_FindMethod(match_methods, (PyObject*) self, name);
-       if (res)
-               return res;
+    res = Py_FindMethod(match_methods, (PyObject*) self, name);
+    if (res)
+        return res;
 
-       PyErr_Clear();
+    PyErr_Clear();
 
-       if (!strcmp(name, "lastindex")) {
-        /* experimental */
+    if (!strcmp(name, "lastindex")) {
         if (self->lastindex >= 0)
             return Py_BuildValue("i", self->lastindex);
         Py_INCREF(Py_None);
@@ -1936,7 +2016,6 @@ match_getattr(MatchObject* self, char* name)
     }
 
     if (!strcmp(name, "lastgroup")) {
-        /* experimental */
         if (self->pattern->indexgroup && self->lastindex >= 0) {
             PyObject* result = PySequence_GetItem(
                 self->pattern->indexgroup, self->lastindex
@@ -1949,36 +2028,49 @@ match_getattr(MatchObject* self, char* name)
         return Py_None;
     }
 
-       if (!strcmp(name, "string")) {
-        Py_INCREF(self->string);
-               return self->string;
+    if (!strcmp(name, "string")) {
+        if (self->string) {
+            Py_INCREF(self->string);
+            return self->string;
+        } else {
+            Py_INCREF(Py_None);
+            return Py_None;
+        }
+    }
+
+    if (!strcmp(name, "regs")) {
+        if (self->regs) {
+            Py_INCREF(self->regs);
+            return self->regs;
+        } else
+            return match_regs(self);
     }
 
-       if (!strcmp(name, "re")) {
+    if (!strcmp(name, "re")) {
         Py_INCREF(self->pattern);
-               return (PyObject*) self->pattern;
+        return (PyObject*) self->pattern;
     }
 
-       if (!strcmp(name, "pos"))
-               return Py_BuildValue("i", self->pos);
+    if (!strcmp(name, "pos"))
+        return Py_BuildValue("i", self->pos);
 
-       if (!strcmp(name, "endpos"))
-               return Py_BuildValue("i", self->endpos);
+    if (!strcmp(name, "endpos"))
+        return Py_BuildValue("i", self->endpos);
 
-       PyErr_SetString(PyExc_AttributeError, name);
-       return NULL;
+    PyErr_SetString(PyExc_AttributeError, name);
+    return NULL;
 }
 
 /* FIXME: implement setattr("string", None) as a special case (to
    detach the associated string, if any */
 
 statichere PyTypeObject Match_Type = {
-       PyObject_HEAD_INIT(NULL)
-       0, "SRE_Match",
-       sizeof(MatchObject), sizeof(int),
-       (destructor)match_dealloc, /*tp_dealloc*/
-       0, /*tp_print*/
-       (getattrfunc)match_getattr /*tp_getattr*/
+    PyObject_HEAD_INIT(NULL)
+    0, "SRE_Match",
+    sizeof(MatchObject), sizeof(int),
+    (destructor)match_dealloc, /*tp_dealloc*/
+    0, /*tp_print*/
+    (getattrfunc)match_getattr /*tp_getattr*/
 };
 
 /* -------------------------------------------------------------------- */
@@ -1987,10 +2079,9 @@ statichere PyTypeObject Match_Type = {
 static void
 scanner_dealloc(ScannerObject* self)
 {
-       state_fini(&self->state);
-    Py_DECREF(self->string);
+    state_fini(&self->state);
     Py_DECREF(self->pattern);
-       PyObject_DEL(self);
+    PyObject_DEL(self);
 }
 
 static PyObject*
@@ -2000,6 +2091,7 @@ scanner_match(ScannerObject* self, PyObject* args)
     PyObject* match;
     int status;
 
+    state->lastindex = -1;
     state->ptr = state->start;
 
     if (state->charsize == 1) {
@@ -2011,7 +2103,7 @@ scanner_match(ScannerObject* self, PyObject* args)
     }
 
     match = pattern_new_match((PatternObject*) self->pattern,
-                               state, self->string, status);
+                               state, status);
 
     if (status == 0 || state->ptr == state->start)
         state->start = (void*) ((char*) state->ptr + state->charsize);
@@ -2029,6 +2121,7 @@ scanner_search(ScannerObject* self, PyObject* args)
     PyObject* match;
     int status;
 
+    state->lastindex = -1;
     state->ptr = state->start;
 
     if (state->charsize == 1) {
@@ -2040,7 +2133,7 @@ scanner_search(ScannerObject* self, PyObject* args)
     }
 
     match = pattern_new_match((PatternObject*) self->pattern,
-                               state, self->string, status);
+                               state, status);
 
     if (status == 0 || state->ptr == state->start)
         state->start = (void*) ((char*) state->ptr + state->charsize);
@@ -2051,46 +2144,46 @@ scanner_search(ScannerObject* self, PyObject* args)
 }
 
 static PyMethodDef scanner_methods[] = {
-       {"match", (PyCFunction) scanner_match, 0},
-       {"search", (PyCFunction) scanner_search, 0},
-       {NULL, NULL}
+    {"match", (PyCFunction) scanner_match, 0},
+    {"search", (PyCFunction) scanner_search, 0},
+    {NULL, NULL}
 };
 
 static PyObject*  
 scanner_getattr(ScannerObject* self, char* name)
 {
-       PyObject* res;
+    PyObject* res;
 
-       res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
-       if (res)
-               return res;
+    res = Py_FindMethod(scanner_methods, (PyObject*) self, name);
+    if (res)
+        return res;
 
-       PyErr_Clear();
+    PyErr_Clear();
 
-       /* attributes */
-       if (!strcmp(name, "pattern")) {
+    /* attributes */
+    if (!strcmp(name, "pattern")) {
         Py_INCREF(self->pattern);
-               return self->pattern;
+        return self->pattern;
     }
 
-       PyErr_SetString(PyExc_AttributeError, name);
-       return NULL;
+    PyErr_SetString(PyExc_AttributeError, name);
+    return NULL;
 }
 
 statichere PyTypeObject Scanner_Type = {
-       PyObject_HEAD_INIT(NULL)
-       0, "SRE_Scanner",
-       sizeof(ScannerObject), 0,
-       (destructor)scanner_dealloc, /*tp_dealloc*/
-       0, /*tp_print*/
-       (getattrfunc)scanner_getattr, /*tp_getattr*/
+    PyObject_HEAD_INIT(NULL)
+    0, "SRE_Scanner",
+    sizeof(ScannerObject), 0,
+    (destructor)scanner_dealloc, /*tp_dealloc*/
+    0, /*tp_print*/
+    (getattrfunc)scanner_getattr, /*tp_getattr*/
 };
 
 static PyMethodDef _functions[] = {
-       {"compile", _compile, 1},
-       {"getcodesize", sre_codesize, 1},
-       {"getlower", sre_getlower, 1},
-       {NULL, NULL}
+    {"compile", _compile, 1},
+    {"getcodesize", sre_codesize, 1},
+    {"getlower", sre_getlower, 1},
+    {NULL, NULL}
 };
 
 void
@@ -2099,11 +2192,11 @@ __declspec(dllexport)
 #endif
 init_sre(void)
 {
-       /* Patch object types */
-       Pattern_Type.ob_type = Match_Type.ob_type =
+    /* Patch object types */
+    Pattern_Type.ob_type = Match_Type.ob_type =
         Scanner_Type.ob_type = &PyType_Type;
 
-       Py_InitModule("_" MODULE, _functions);
+    Py_InitModule("_" MODULE, _functions);
 }
 
 #endif /* !defined(SRE_RECURSIVE) */
index 1c4bf687283f38b005ebacd39807deb3435f6c33..a62b9171d99c3e96146822b0ca59f3623967a992 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ *
  * Secret Labs' Regular Expression Engine
  *
  * regular expression matching engine
@@ -33,6 +34,7 @@ typedef struct {
 typedef struct {
     PyObject_VAR_HEAD
     PyObject* string; /* link to the target string */
+    PyObject* regs; /* cached list of matching spans */
     PatternObject* pattern; /* link to the regex (pattern) object */
     int pos, endpos; /* current target slice */
     int lastindex; /* last index marker seen by the engine (-1 if none) */
@@ -60,6 +62,9 @@ typedef struct {
     void* beginning; /* start of original string */
     void* start; /* start of current slice */
     void* end; /* end of original string */
+    /* attributes for the match object */
+    PyObject* string;
+    int pos, endpos;
     /* character size */
     int charsize;
     /* registers */
@@ -78,7 +83,6 @@ typedef struct {
     /* scanner (internal helper object) */
     PyObject_HEAD
     PyObject* pattern;
-    PyObject* string;
     SRE_STATE state;
 } ScannerObject;