]> granicus.if.org Git - python/commitdiff
- fixed lookahead assertions (#10, #11, #12)
authorFredrik Lundh <fredrik@pythonware.com>
Fri, 30 Jun 2000 10:41:31 +0000 (10:41 +0000)
committerFredrik Lundh <fredrik@pythonware.com>
Fri, 30 Jun 2000 10:41:31 +0000 (10:41 +0000)
- untabified sre_constants.py

Lib/sre_compile.py
Lib/sre_constants.py
Lib/sre_parse.py
Lib/test/output/test_sre
Modules/_sre.c
Modules/sre_constants.h

index 9fdc8f395a2dbba6794398a8fbada985d36b7885..0829c00e2796190f84ce32b17979968b87deeef2 100644 (file)
@@ -26,52 +26,12 @@ def _compile(code, pattern, flags):
     # internal: compile a (sub)pattern
     emit = code.append
     for op, av in pattern:
-        if op is ANY:
-            if flags & SRE_FLAG_DOTALL:
-                emit(OPCODES[op])
-            else:
-                emit(OPCODES[CATEGORY])
-                emit(CHCODES[CATEGORY_NOT_LINEBREAK])
-        elif op in (SUCCESS, FAILURE):
-            emit(OPCODES[op])
-        elif op is AT:
-            emit(OPCODES[op])
-            if flags & SRE_FLAG_MULTILINE:
-                emit(ATCODES[AT_MULTILINE[av]])
-            else:
-                emit(ATCODES[av])
-        elif op is BRANCH:
-            emit(OPCODES[op])
-            tail = []
-            for av in av[1]:
-                skip = len(code); emit(0)
-                _compile(code, av, flags)
-                emit(OPCODES[JUMP])
-                tail.append(len(code)); emit(0)
-                code[skip] = len(code) - skip
-            emit(0) # end of branch
-            for tail in tail:
-                code[tail] = len(code) - tail
-        elif op is CALL:
-            emit(OPCODES[op])
-            skip = len(code); emit(0)
-            _compile(code, av, flags)
-            emit(OPCODES[SUCCESS])
-            code[skip] = len(code) - skip
-        elif op is CATEGORY:
-            emit(OPCODES[op])
-            if flags & SRE_FLAG_LOCALE:
-                emit(CHCODES[CH_LOCALE[av]])
-            elif flags & SRE_FLAG_UNICODE:
-                emit(CHCODES[CH_UNICODE[av]])
-            else:
-                emit(CHCODES[av])
-        elif op is GROUP:
+        if op in (LITERAL, NOT_LITERAL):
             if flags & SRE_FLAG_IGNORECASE:
                 emit(OPCODES[OP_IGNORE[op]])
             else:
                 emit(OPCODES[op])
-            emit(av-1)
+            emit(ord(av))
         elif op is IN:
             if flags & SRE_FLAG_IGNORECASE:
                 emit(OPCODES[OP_IGNORE[op]])
@@ -101,15 +61,12 @@ def _compile(code, pattern, flags):
                     raise error, "internal: unsupported set operator"
             emit(OPCODES[FAILURE])
             code[skip] = len(code) - skip
-        elif op in (LITERAL, NOT_LITERAL):
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OPCODES[OP_IGNORE[op]])
-            else:
+        elif op is ANY:
+            if flags & SRE_FLAG_DOTALL:
                 emit(OPCODES[op])
-            emit(ord(av))
-        elif op is MARK:
-            emit(OPCODES[op])
-            emit(av)
+            else:
+                emit(OPCODES[CATEGORY])
+                emit(CHCODES[CATEGORY_NOT_LINEBREAK])
         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
             if flags & SRE_FLAG_TEMPLATE:
                 emit(OPCODES[REPEAT])
@@ -150,6 +107,49 @@ def _compile(code, pattern, flags):
             if group:
                 emit(OPCODES[MARK])
                 emit((group-1)*2+1)
+        elif op in (SUCCESS, FAILURE):
+            emit(OPCODES[op])
+        elif op in (ASSERT, ASSERT_NOT, CALL):
+            emit(OPCODES[op])
+            skip = len(code); emit(0)
+            _compile(code, av, flags)
+            emit(OPCODES[SUCCESS])
+            code[skip] = len(code) - skip
+        elif op is AT:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_MULTILINE:
+                emit(ATCODES[AT_MULTILINE[av]])
+            else:
+                emit(ATCODES[av])
+        elif op is BRANCH:
+            emit(OPCODES[op])
+            tail = []
+            for av in av[1]:
+                skip = len(code); emit(0)
+                _compile(code, av, flags)
+                emit(OPCODES[JUMP])
+                tail.append(len(code)); emit(0)
+                code[skip] = len(code) - skip
+            emit(0) # end of branch
+            for tail in tail:
+                code[tail] = len(code) - tail
+        elif op is CATEGORY:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_LOCALE:
+                emit(CHCODES[CH_LOCALE[av]])
+            elif flags & SRE_FLAG_UNICODE:
+                emit(CHCODES[CH_UNICODE[av]])
+            else:
+                emit(CHCODES[av])
+        elif op is GROUP:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(av-1)
+        elif op is MARK:
+            emit(OPCODES[op])
+            emit(av)
         else:
             raise ValueError, ("unsupported operand type", op)
 
index f5e7894e3bae44bef3b2ccb74669386cb742fdab..45f4f482d2887f10ae6175b23a5de129b7f59976 100644 (file)
@@ -23,6 +23,7 @@ SUCCESS = "success"
 
 ANY = "any"
 ASSERT = "assert"
+ASSERT_NOT = "assert_not"
 AT = "at"
 BRANCH = "branch"
 CALL = "call"
@@ -81,7 +82,7 @@ OPCODES = [
     FAILURE, SUCCESS,
 
     ANY,
-    ASSERT,
+    ASSERT, ASSERT_NOT,
     AT,
     BRANCH,
     CALL,
@@ -121,8 +122,8 @@ def makedict(list):
     d = {}
     i = 0
     for item in list:
-       d[item] = i
-       i = i + 1
+        d[item] = i
+        i = i + 1
     return d
 
 OPCODES = makedict(OPCODES)
@@ -176,12 +177,27 @@ SRE_FLAG_VERBOSE = 64
 if __name__ == "__main__":
     import string
     def dump(f, d, prefix):
-       items = d.items()
-       items.sort(lambda a, b: cmp(a[1], b[1]))
-       for k, v in items:
-           f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+        items = d.items()
+        items.sort(lambda a, b: cmp(a[1], b[1]))
+        for k, v in items:
+            f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
     f = open("sre_constants.h", "w")
-    f.write("/* generated from sre_constants.py */\n")
+    f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py.  If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
     dump(f, OPCODES, "SRE_OP")
     dump(f, ATCODES, "SRE")
     dump(f, CHCODES, "SRE")
index a6f3082a44ae144d99f3f848356b9bab66861529..d3dbe00041e15166ac11332cfe7ca52e33b0450d 100644 (file)
@@ -470,6 +470,25 @@ def _parse(source, state, flags=0):
                         if source.next is None or source.next == ")":
                             break
                         source.get()
+                elif source.next in ("=", "!"):
+                    # lookahead assertions
+                    char = source.get()
+                    b = []
+                    while 1:
+                        p = _parse(source, state, flags)
+                        if source.next == ")":
+                            if b:
+                                b.append(p)
+                                p = _branch(state, b)
+                            if char == "=":
+                                subpattern.append((ASSERT, p))
+                            else:
+                                subpattern.append((ASSERT_NOT, p))
+                            break
+                        elif source.match("|"):
+                            b.append(p)
+                        else:
+                            raise error, "pattern not properly closed"
                 else:
                     # flags
                     while FLAGS.has_key(source.next):
index 75caa55c380a82ba0864302fd940d91a42e59933..d3732b5214985ede6609e0c6031770ded70d3325 100644 (file)
@@ -6,7 +6,4 @@ test_support -- test failed re module cPickle
 === grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
 === grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
 === grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
-=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
 === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
index 6fcd65ea7393c4909f0d25b9af8028a6919fe59c..22b6c7347c5d2c1ba8540f5b1413add087eafef7 100644 (file)
@@ -20,6 +20,7 @@
  * 00-06-28 fl fixed findall (0.9.1)
  * 00-06-29 fl fixed split, added more scanner features (0.9.2)
  * 00-06-30 fl tuning, fast search (0.9.3)
+ * 00-06-30 fl added assert (lookahead) primitives (0.9.4)
  *
  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
  *
@@ -30,7 +31,7 @@
 
 #ifndef SRE_RECURSIVE
 
-char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
+char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
 
 #include "Python.h"
 
@@ -576,11 +577,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
                        pattern += pattern[0];
                        break;
 
-#if 0
-               case SRE_OP_CALL:
-                       /* match subpattern, without backtracking */
+               case SRE_OP_ASSERT:
+                       /* assert subpattern */
                        /* args: <skip> <pattern> */
-                       TRACE(("%8d: subpattern\n", PTR(ptr)));
+                       TRACE(("%8d: assert subpattern\n", PTR(ptr)));
                        state->ptr = ptr;
                        i = SRE_MATCH(state, pattern + 1);
             if (i < 0)
@@ -588,9 +588,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
             if (!i)
                                goto failure;
                        pattern += pattern[0];
-                       ptr = state->ptr;
                        break;
-#endif
+
+               case SRE_OP_ASSERT_NOT:
+                       /* assert not subpattern */
+                       /* args: <skip> <pattern> */
+                       TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
+                       state->ptr = ptr;
+                       i = SRE_MATCH(state, pattern + 1);
+            if (i < 0)
+                return i;
+            if (i)
+                               goto failure;
+                       pattern += pattern[0];
+                       break;
 
 #if 0
                case SRE_OP_MAX_REPEAT_ONE:
index 01c844881635257ce5883d9df8ac506b7f0ce729..2ec00bada4753d56b5805ed4d717f85bb012f6d5 100644 (file)
@@ -1,29 +1,42 @@
-/* generated from sre_constants.py */
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py.  If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
 #define SRE_OP_ASSERT 3
-#define SRE_OP_AT 4
-#define SRE_OP_BRANCH 5
-#define SRE_OP_CALL 6
-#define SRE_OP_CATEGORY 7
-#define SRE_OP_GROUP 8
-#define SRE_OP_GROUP_IGNORE 9
-#define SRE_OP_IN 10
-#define SRE_OP_IN_IGNORE 11
-#define SRE_OP_INFO 12
-#define SRE_OP_JUMP 13
-#define SRE_OP_LITERAL 14
-#define SRE_OP_LITERAL_IGNORE 15
-#define SRE_OP_MARK 16
-#define SRE_OP_MAX_REPEAT 17
-#define SRE_OP_MAX_REPEAT_ONE 18
-#define SRE_OP_MIN_REPEAT 19
-#define SRE_OP_NOT_LITERAL 20
-#define SRE_OP_NOT_LITERAL_IGNORE 21
-#define SRE_OP_NEGATE 22
-#define SRE_OP_RANGE 23
-#define SRE_OP_REPEAT 24
+#define SRE_OP_ASSERT_NOT 4
+#define SRE_OP_AT 5
+#define SRE_OP_BRANCH 6
+#define SRE_OP_CALL 7
+#define SRE_OP_CATEGORY 8
+#define SRE_OP_GROUP 9
+#define SRE_OP_GROUP_IGNORE 10
+#define SRE_OP_IN 11
+#define SRE_OP_IN_IGNORE 12
+#define SRE_OP_INFO 13
+#define SRE_OP_JUMP 14
+#define SRE_OP_LITERAL 15
+#define SRE_OP_LITERAL_IGNORE 16
+#define SRE_OP_MARK 17
+#define SRE_OP_MAX_REPEAT 18
+#define SRE_OP_MAX_REPEAT_ONE 19
+#define SRE_OP_MIN_REPEAT 20
+#define SRE_OP_NOT_LITERAL 21
+#define SRE_OP_NOT_LITERAL_IGNORE 22
+#define SRE_OP_NEGATE 23
+#define SRE_OP_RANGE 24
+#define SRE_OP_REPEAT 25
 #define SRE_AT_BEGINNING 0
 #define SRE_AT_BEGINNING_LINE 1
 #define SRE_AT_BOUNDARY 2