# internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
- if op is ANY:
- if flags & SRE_FLAG_DOTALL:
- emit(OPCODES[op])
- else:
- emit(OPCODES[CATEGORY])
- emit(CHCODES[CATEGORY_NOT_LINEBREAK])
- elif op in (SUCCESS, FAILURE):
- emit(OPCODES[op])
- elif op is AT:
- emit(OPCODES[op])
- if flags & SRE_FLAG_MULTILINE:
- emit(ATCODES[AT_MULTILINE[av]])
- else:
- emit(ATCODES[av])
- elif op is BRANCH:
- emit(OPCODES[op])
- tail = []
- for av in av[1]:
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[JUMP])
- tail.append(len(code)); emit(0)
- code[skip] = len(code) - skip
- emit(0) # end of branch
- for tail in tail:
- code[tail] = len(code) - tail
- elif op is CALL:
- emit(OPCODES[op])
- skip = len(code); emit(0)
- _compile(code, av, flags)
- emit(OPCODES[SUCCESS])
- code[skip] = len(code) - skip
- elif op is CATEGORY:
- emit(OPCODES[op])
- if flags & SRE_FLAG_LOCALE:
- emit(CHCODES[CH_LOCALE[av]])
- elif flags & SRE_FLAG_UNICODE:
- emit(CHCODES[CH_UNICODE[av]])
- else:
- emit(CHCODES[av])
- elif op is GROUP:
+ if op in (LITERAL, NOT_LITERAL):
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
else:
emit(OPCODES[op])
- emit(av-1)
+ emit(ord(av))
elif op is IN:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
raise error, "internal: unsupported set operator"
emit(OPCODES[FAILURE])
code[skip] = len(code) - skip
- elif op in (LITERAL, NOT_LITERAL):
- if flags & SRE_FLAG_IGNORECASE:
- emit(OPCODES[OP_IGNORE[op]])
- else:
+ elif op is ANY:
+ if flags & SRE_FLAG_DOTALL:
emit(OPCODES[op])
- emit(ord(av))
- elif op is MARK:
- emit(OPCODES[op])
- emit(av)
+ else:
+ emit(OPCODES[CATEGORY])
+ emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
emit(OPCODES[REPEAT])
if group:
emit(OPCODES[MARK])
emit((group-1)*2+1)
+ elif op in (SUCCESS, FAILURE):
+ emit(OPCODES[op])
+ elif op in (ASSERT, ASSERT_NOT, CALL):
+ emit(OPCODES[op])
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = len(code) - skip
+ elif op is AT:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_MULTILINE:
+ emit(ATCODES[AT_MULTILINE[av]])
+ else:
+ emit(ATCODES[av])
+ elif op is BRANCH:
+ emit(OPCODES[op])
+ tail = []
+ for av in av[1]:
+ skip = len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[JUMP])
+ tail.append(len(code)); emit(0)
+ code[skip] = len(code) - skip
+ emit(0) # end of branch
+ for tail in tail:
+ code[tail] = len(code) - tail
+ elif op is CATEGORY:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_LOCALE:
+ emit(CHCODES[CH_LOCALE[av]])
+ elif flags & SRE_FLAG_UNICODE:
+ emit(CHCODES[CH_UNICODE[av]])
+ else:
+ emit(CHCODES[av])
+ elif op is GROUP:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ else:
+ emit(OPCODES[op])
+ emit(av-1)
+ elif op is MARK:
+ emit(OPCODES[op])
+ emit(av)
else:
raise ValueError, ("unsupported operand type", op)
ANY = "any"
ASSERT = "assert"
+ASSERT_NOT = "assert_not"
AT = "at"
BRANCH = "branch"
CALL = "call"
FAILURE, SUCCESS,
ANY,
- ASSERT,
+ ASSERT, ASSERT_NOT,
AT,
BRANCH,
CALL,
d = {}
i = 0
for item in list:
- d[item] = i
- i = i + 1
+ d[item] = i
+ i = i + 1
return d
OPCODES = makedict(OPCODES)
if __name__ == "__main__":
import string
def dump(f, d, prefix):
- items = d.items()
- items.sort(lambda a, b: cmp(a[1], b[1]))
- for k, v in items:
- f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
+ items = d.items()
+ items.sort(lambda a, b: cmp(a[1], b[1]))
+ for k, v in items:
+ f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
- f.write("/* generated from sre_constants.py */\n")
+ f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
dump(f, CHCODES, "SRE")
if source.next is None or source.next == ")":
break
source.get()
+ elif source.next in ("=", "!"):
+ # lookahead assertions
+ char = source.get()
+ b = []
+ while 1:
+ p = _parse(source, state, flags)
+ if source.next == ")":
+ if b:
+ b.append(p)
+ p = _branch(state, b)
+ if char == "=":
+ subpattern.append((ASSERT, p))
+ else:
+ subpattern.append((ASSERT_NOT, p))
+ break
+ elif source.match("|"):
+ b.append(p)
+ else:
+ raise error, "pattern not properly closed"
else:
# flags
while FLAGS.has_key(source.next):
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
=== grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a'
=== grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A'
-=== Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad')
-=== Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad')
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
* 00-06-28 fl fixed findall (0.9.1)
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
* 00-06-30 fl tuning, fast search (0.9.3)
+ * 00-06-30 fl added assert (lookahead) primitives (0.9.4)
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
#ifndef SRE_RECURSIVE
-char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB ";
+char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB ";
#include "Python.h"
pattern += pattern[0];
break;
-#if 0
- case SRE_OP_CALL:
- /* match subpattern, without backtracking */
+ case SRE_OP_ASSERT:
+ /* assert subpattern */
/* args: <skip> <pattern> */
- TRACE(("%8d: subpattern\n", PTR(ptr)));
+ TRACE(("%8d: assert subpattern\n", PTR(ptr)));
state->ptr = ptr;
i = SRE_MATCH(state, pattern + 1);
if (i < 0)
if (!i)
goto failure;
pattern += pattern[0];
- ptr = state->ptr;
break;
-#endif
+
+ case SRE_OP_ASSERT_NOT:
+ /* assert not subpattern */
+ /* args: <skip> <pattern> */
+ TRACE(("%8d: assert not subpattern\n", PTR(ptr)));
+ state->ptr = ptr;
+ i = SRE_MATCH(state, pattern + 1);
+ if (i < 0)
+ return i;
+ if (i)
+ goto failure;
+ pattern += pattern[0];
+ break;
#if 0
case SRE_OP_MAX_REPEAT_ONE:
-/* generated from sre_constants.py */
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
#define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2
#define SRE_OP_ASSERT 3
-#define SRE_OP_AT 4
-#define SRE_OP_BRANCH 5
-#define SRE_OP_CALL 6
-#define SRE_OP_CATEGORY 7
-#define SRE_OP_GROUP 8
-#define SRE_OP_GROUP_IGNORE 9
-#define SRE_OP_IN 10
-#define SRE_OP_IN_IGNORE 11
-#define SRE_OP_INFO 12
-#define SRE_OP_JUMP 13
-#define SRE_OP_LITERAL 14
-#define SRE_OP_LITERAL_IGNORE 15
-#define SRE_OP_MARK 16
-#define SRE_OP_MAX_REPEAT 17
-#define SRE_OP_MAX_REPEAT_ONE 18
-#define SRE_OP_MIN_REPEAT 19
-#define SRE_OP_NOT_LITERAL 20
-#define SRE_OP_NOT_LITERAL_IGNORE 21
-#define SRE_OP_NEGATE 22
-#define SRE_OP_RANGE 23
-#define SRE_OP_REPEAT 24
+#define SRE_OP_ASSERT_NOT 4
+#define SRE_OP_AT 5
+#define SRE_OP_BRANCH 6
+#define SRE_OP_CALL 7
+#define SRE_OP_CATEGORY 8
+#define SRE_OP_GROUP 9
+#define SRE_OP_GROUP_IGNORE 10
+#define SRE_OP_IN 11
+#define SRE_OP_IN_IGNORE 12
+#define SRE_OP_INFO 13
+#define SRE_OP_JUMP 14
+#define SRE_OP_LITERAL 15
+#define SRE_OP_LITERAL_IGNORE 16
+#define SRE_OP_MARK 17
+#define SRE_OP_MAX_REPEAT 18
+#define SRE_OP_MAX_REPEAT_ONE 19
+#define SRE_OP_MIN_REPEAT 20
+#define SRE_OP_NOT_LITERAL 21
+#define SRE_OP_NOT_LITERAL_IGNORE 22
+#define SRE_OP_NEGATE 23
+#define SRE_OP_RANGE 24
+#define SRE_OP_REPEAT 25
#define SRE_AT_BEGINNING 0
#define SRE_AT_BEGINNING_LINE 1
#define SRE_AT_BOUNDARY 2