]> granicus.if.org Git - python/commitdiff
final 0.9.8 updates:
authorFredrik Lundh <fredrik@pythonware.com>
Tue, 1 Aug 2000 22:47:49 +0000 (22:47 +0000)
committerFredrik Lundh <fredrik@pythonware.com>
Tue, 1 Aug 2000 22:47:49 +0000 (22:47 +0000)
-- added REPEAT_ONE operator
-- added ANY_ALL operator (used to represent "(?s).")

Lib/sre.py
Lib/sre_compile.py
Lib/sre_constants.py
Lib/sre_parse.py
Modules/_sre.c
Modules/sre_constants.h

index 3e125a783a48df2171a1c9fd951d7b594935eff9..edfefc12b77a6f1f8eb8bd2beaa24f32c4c088ed 100644 (file)
@@ -98,7 +98,10 @@ def _compile(pattern, flags=0):
         return _cache[key]
     except KeyError:
         pass
-    p = sre_compile.compile(pattern, flags)
+    try:
+        p = sre_compile.compile(pattern, flags)
+    except error, v:
+        raise error, v # invalid expression
     if len(_cache) >= _MAXCACHE:
         _cache.clear()
     _cache[key] = p
index 8fdcecf953ab456b0eacf637932579c4617ccd01..abd619e1e9bd7769f411fe3b286484818f2551cf 100644 (file)
@@ -73,6 +73,13 @@ def _charset(charset, fixup=None):
         return out
     return charset
 
+def _simple(av):
+    # check if av is a "simple" operator
+    lo, hi = av[2].getwidth()
+    if lo == 0:
+        raise error, "nothing to repeat"
+    return lo == hi == 1 and av[2][0][0] != SUBPATTERN
+
 def _compile(code, pattern, flags):
     # internal: compile a (sub)pattern
     emit = code.append
@@ -116,10 +123,9 @@ def _compile(code, pattern, flags):
             code[skip] = len(code) - skip
         elif op is ANY:
             if flags & SRE_FLAG_DOTALL:
-                emit(OPCODES[op])
+                emit(OPCODES[ANY_ALL])
             else:
-                emit(OPCODES[CATEGORY])
-                emit(CHCODES[CATEGORY_NOT_LINEBREAK])
+                emit(OPCODES[ANY])
         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
             if flags & SRE_FLAG_TEMPLATE:
                 raise error, "internal: unsupported template operator"
@@ -130,30 +136,25 @@ def _compile(code, pattern, flags):
                 _compile(code, av[2], flags)
                 emit(OPCODES[SUCCESS])
                 code[skip] = len(code) - skip
+            elif _simple(av) and op == MAX_REPEAT:
+                emit(OPCODES[REPEAT_ONE])
+                skip = len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(OPCODES[SUCCESS])
+                code[skip] = len(code) - skip
             else:
-                lo, hi = av[2].getwidth()
-                if lo == 0:
-                    raise error, "nothing to repeat"
-                if 0 and lo == hi == 1 and op is MAX_REPEAT:
-                    # FIXME: <fl> fast and wrong (but we'll fix that)
-                    emit(OPCODES[REPEAT_ONE])
-                    skip = len(code); emit(0)
-                    emit(av[0])
-                    emit(av[1])
-                    _compile(code, av[2], flags)
-                    emit(OPCODES[SUCCESS])
-                    code[skip] = len(code) - skip
+                emit(OPCODES[REPEAT])
+                skip = len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                code[skip] = len(code) - skip
+                if op == MAX_REPEAT:
+                    emit(OPCODES[MAX_UNTIL])
                 else:
-                    emit(OPCODES[REPEAT])
-                    skip = len(code); emit(0)
-                    emit(av[0])
-                    emit(av[1])
-                    _compile(code, av[2], flags)
-                    code[skip] = len(code) - skip
-                    if op == MAX_REPEAT:
-                        emit(OPCODES[MAX_UNTIL])
-                    else:
-                        emit(OPCODES[MIN_UNTIL])
+                    emit(OPCODES[MIN_UNTIL])
         elif op is SUBPATTERN:
             if av[0]:
                 emit(OPCODES[MARK])
index e5959150df88bca28caeaa21f1bf38a41301fda3..5a20930ce1d51b73577d811e63a587c9e6b4dea6 100644 (file)
@@ -20,6 +20,7 @@ FAILURE = "failure"
 SUCCESS = "success"
 
 ANY = "any"
+ANY_ALL = "any_all"
 ASSERT = "assert"
 ASSERT_NOT = "assert_not"
 AT = "at"
@@ -81,7 +82,7 @@ OPCODES = [
     # failure=0 success=1 (just because it looks better that way :-)
     FAILURE, SUCCESS,
 
-    ANY,
+    ANY, ANY_ALL,
     ASSERT, ASSERT_NOT,
     AT,
     BRANCH,
index 1eec3d3d1920406a7370cf1be3a87e6a6366fa6d..1c1d0d5d44dadcfd157af774b6017dcd0a62c9aa 100644 (file)
@@ -142,7 +142,7 @@ class SubPattern:
                 for av in av[1]:
                     l, h = av.getwidth()
                     i = min(i, l)
-                    j = min(j, h)
+                    j = max(j, h)
                 lo = lo + i
                 hi = hi + j
             elif op is CALL:
index 69bc17114e2cf3d33bb1da1fd9466a442e4ca348..677edb8842e98ed1f022df286d397b088f70ebab 100644 (file)
@@ -448,6 +448,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
     int i, count;
     SRE_REPEAT* rp;
     int lastmark;
+    SRE_CODE chr;
 
     SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
 
@@ -525,8 +526,17 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             break;
 
         case SRE_OP_ANY:
-            /* match anything */
+            /* match anything (except a newline) */
             /* <ANY> */
+            TRACE(("%8d: anything (except newline)\n", PTR(ptr)));
+            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
+                return 0;
+            ptr++;
+            break;
+
+        case SRE_OP_ANY_ALL:
+            /* match anything */
+            /* <ANY_ALL> */
             TRACE(("%8d: anything\n", PTR(ptr)));
             if (ptr >= end)
                 return 0;
@@ -695,60 +705,79 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             TRACE(("%8d: max repeat one {%d,%d}\n", PTR(ptr),
                    pattern[1], pattern[2]));
 
+            if (ptr + pattern[1] > end)
+                return 0; /* cannot match */
+
             count = 0;
 
-            if (pattern[3] == SRE_OP_ANY) {
+            switch (pattern[3]) {
+
+            case SRE_OP_ANY:
+                /* repeated wildcard. */
+                while (count < (int) pattern[2]) {
+                    if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
+                        break;
+                    ptr++;
+                    count++;
+                }
+                break;
+
+            case SRE_OP_ANY_ALL:
                 /* repeated wildcard.  skip to the end of the target
                    string, and backtrack from there */
-                /* FIXME: must look for line endings */
                 if (ptr + pattern[1] > end)
                     return 0; /* cannot match */
                 count = pattern[2];
                 if (count > end - ptr)
                     count = end - ptr;
                 ptr += count;
+                break;
 
-            } else if (pattern[3] == SRE_OP_LITERAL) {
+            case SRE_OP_LITERAL:
                 /* repeated literal */
-                SRE_CODE chr = pattern[4];
+                chr = pattern[4];
                 while (count < (int) pattern[2]) {
                     if (ptr >= end || (SRE_CODE) ptr[0] != chr)
                         break;
                     ptr++;
                     count++;
                 }
+                break;
 
-            } else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
+            case SRE_OP_LITERAL_IGNORE:
                 /* repeated literal */
-                SRE_CODE chr = pattern[4];
+                chr = pattern[4];
                 while (count < (int) pattern[2]) {
                     if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
                         break;
                     ptr++;
                     count++;
                 }
+                break;
 
-            } else if (pattern[3] == SRE_OP_NOT_LITERAL) {
+            case SRE_OP_NOT_LITERAL:
                 /* repeated non-literal */
-                SRE_CODE chr = pattern[4];
+                chr = pattern[4];
                 while (count < (int) pattern[2]) {
                     if (ptr >= end || (SRE_CODE) ptr[0] == chr)
                         break;
                     ptr++;
                     count++;
                 }
-
-            } else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
+                break;
+                
+            case SRE_OP_NOT_LITERAL_IGNORE:
                 /* repeated non-literal */
-                SRE_CODE chr = pattern[4];
+                chr = pattern[4];
                 while (count < (int) pattern[2]) {
                     if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
                         break;
                     ptr++;
                     count++;
                 }
+                break;
 
-            } else if (pattern[3] == SRE_OP_IN) {
+            case SRE_OP_IN:
                 /* repeated set */
                 while (count < (int) pattern[2]) {
                     if (ptr >= end || !SRE_MEMBER(pattern + 5, *ptr))
@@ -756,8 +785,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
                     ptr++;
                     count++;
                 }
+                break;
 
-            } else {
+            default:
                 /* repeated single character pattern */
                 state->ptr = ptr;
                 while (count < (int) pattern[2]) {
@@ -770,6 +800,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
                 }
                 state->ptr = ptr;
                 ptr += count;
+                break;
             }
 
             /* when we arrive here, count contains the number of
@@ -791,7 +822,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
             } else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
                 /* tail starts with a literal. skip positions where
                    the rest of the pattern cannot possibly match */
-                SRE_CODE chr = pattern[pattern[0]+1];
+                chr = pattern[pattern[0]+1];
                 TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
                 for (;;) {
                     TRACE(("%8d: scan for tail match\n", PTR(ptr)));
index 5cfe49570b67b93987d10574002a39d78e48419d..5c55c3dbd91721114b83bd45618fbea823f14568 100644 (file)
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
-#define SRE_OP_ASSERT 3
-#define SRE_OP_ASSERT_NOT 4
-#define SRE_OP_AT 5
-#define SRE_OP_BRANCH 6
-#define SRE_OP_CALL 7
-#define SRE_OP_CATEGORY 8
-#define SRE_OP_CHARSET 9
-#define SRE_OP_GROUPREF 10
-#define SRE_OP_GROUPREF_IGNORE 11
-#define SRE_OP_IN 12
-#define SRE_OP_IN_IGNORE 13
-#define SRE_OP_INFO 14
-#define SRE_OP_JUMP 15
-#define SRE_OP_LITERAL 16
-#define SRE_OP_LITERAL_IGNORE 17
-#define SRE_OP_MARK 18
-#define SRE_OP_MAX_UNTIL 19
-#define SRE_OP_MIN_UNTIL 20
-#define SRE_OP_NOT_LITERAL 21
-#define SRE_OP_NOT_LITERAL_IGNORE 22
-#define SRE_OP_NEGATE 23
-#define SRE_OP_RANGE 24
-#define SRE_OP_REPEAT 25
-#define SRE_OP_REPEAT_ONE 26
-#define SRE_OP_SUBPATTERN 27
+#define SRE_OP_ANY_ALL 3
+#define SRE_OP_ASSERT 4
+#define SRE_OP_ASSERT_NOT 5
+#define SRE_OP_AT 6
+#define SRE_OP_BRANCH 7
+#define SRE_OP_CALL 8
+#define SRE_OP_CATEGORY 9
+#define SRE_OP_CHARSET 10
+#define SRE_OP_GROUPREF 11
+#define SRE_OP_GROUPREF_IGNORE 12
+#define SRE_OP_IN 13
+#define SRE_OP_IN_IGNORE 14
+#define SRE_OP_INFO 15
+#define SRE_OP_JUMP 16
+#define SRE_OP_LITERAL 17
+#define SRE_OP_LITERAL_IGNORE 18
+#define SRE_OP_MARK 19
+#define SRE_OP_MAX_UNTIL 20
+#define SRE_OP_MIN_UNTIL 21
+#define SRE_OP_NOT_LITERAL 22
+#define SRE_OP_NOT_LITERAL_IGNORE 23
+#define SRE_OP_NEGATE 24
+#define SRE_OP_RANGE 25
+#define SRE_OP_REPEAT 26
+#define SRE_OP_REPEAT_ONE 27
+#define SRE_OP_SUBPATTERN 28
 #define SRE_AT_BEGINNING 0
 #define SRE_AT_BEGINNING_LINE 1
 #define SRE_AT_BOUNDARY 2