bumped SRE version number to 2.1. cleaned up and added 1.5.2

author Fredrik Lundh <fredrik@pythonware.com>

Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)

committer Fredrik Lundh <fredrik@pythonware.com>

Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)
author Fredrik Lundh <fredrik@pythonware.com>
Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)
committer Fredrik Lundh <fredrik@pythonware.com>
Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)
diff --git a/Lib/sre.py b/Lib/sre.py

index 8d03e921a9181bc9d67c50e346ba39a79012e3fc..859ff9e70ddea99e8c79a8332a133aecc57b2cab 100644 (file)
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -181,7 +181,7 @@ def _split(pattern, string, maxsplit=0):
              continue
          append(string[i:b])
          if g and b != e:
-            extend(m.groups())
+            extend(list(m.groups()))
          i = e
          n = n + 1
      append(string[i:])
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py

index 454e4779f8441700b49a2d25ca48caacc77c0c64..7c6eb9f7647de67b8e89dd7eed8fe83651f5bbd4 100644 (file)
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -60,6 +60,12 @@ FLAGS = {
      "u": SRE_FLAG_UNICODE,
  }
  
+try:
+    int("10", 8)
+    atoi = int
+except TypeError:
+    atoi = string.atoi
+
  class Pattern:
      # master pattern object.  keeps track of global attributes
      def __init__(self):
@@ -216,7 +222,7 @@ def isname(name):
  def _group(escape, groups):
      # check if the escape string represents a valid group
      try:
-        gid = int(escape[1:])
+        gid = atoi(escape[1:])
          if gid and gid < groups:
              return gid
      except ValueError:
@@ -239,13 +245,13 @@ def _class_escape(source, escape):
              escape = escape[2:]
              if len(escape) != 2:
                  raise error, "bogus escape: %s" % repr("\\" + escape)
-            return LITERAL, int(escape, 16) & 0xff
+            return LITERAL, atoi(escape, 16) & 0xff
          elif str(escape[1:2]) in OCTDIGITS:
              # octal escape (up to three digits)
              while source.next in OCTDIGITS and len(escape) < 5:
                  escape = escape + source.get()
              escape = escape[1:]
-            return LITERAL, int(escape, 8) & 0xff
+            return LITERAL, atoi(escape, 8) & 0xff
          if len(escape) == 2:
              return LITERAL, ord(escape[1])
      except ValueError:
@@ -267,12 +273,12 @@ def _escape(source, escape, state):
                  escape = escape + source.get()
              if len(escape) != 4:
                  raise ValueError
-            return LITERAL, int(escape[2:], 16) & 0xff
+            return LITERAL, atoi(escape[2:], 16) & 0xff
          elif escape[1:2] == "0":
              # octal escape
              while source.next in OCTDIGITS and len(escape) < 4:
                  escape = escape + source.get()
-            return LITERAL, int(escape[1:], 8) & 0xff
+            return LITERAL, atoi(escape[1:], 8) & 0xff
          elif escape[1:2] in DIGITS:
              # octal escape *or* decimal group reference (sigh)
              here = source.tell()
@@ -282,7 +288,7 @@ def _escape(source, escape, state):
                      source.next in OCTDIGITS):
                      # got three octal digits; this is an octal escape
                      escape = escape + source.get()
-                    return LITERAL, int(escape[1:], 8) & 0xff
+                    return LITERAL, atoi(escape[1:], 8) & 0xff
              # got at least one decimal digit; this is a group reference
              group = _group(escape, state.groups)
              if group:
@@ -456,9 +462,9 @@ def _parse(source, state):
                      source.seek(here)
                      continue
                  if lo:
-                    min = int(lo)
+                    min = atoi(lo)
                  if hi:
-                    max = int(hi)
+                    max = atoi(hi)
                  if max < min:
                      raise error, "bad repeat interval"
              else:
@@ -646,7 +652,7 @@ def parse_template(source, pattern):
                  if not name:
                      raise error, "bad group name"
                  try:
-                    index = int(name)
+                    index = atoi(name)
                  except ValueError:
                      if not isname(name):
                          raise error, "bad character in group name"
@@ -662,7 +668,7 @@ def parse_template(source, pattern):
                      if group:
                          if (s.next not in DIGITS or
                              not _group(this + s.next, pattern.groups+1)):
-                            code = MARK, int(group)
+                            code = MARK, group
                              break
                      elif s.next in OCTDIGITS:
                          this = this + s.get()
@@ -670,7 +676,7 @@ def parse_template(source, pattern):
                          break
                  if not code:
                      this = this[1:]
-                    code = LITERAL, int(this[-6:], 8) & 0xff
+                    code = LITERAL, atoi(this[-6:], 8) & 0xff
                  a(code)
              else:
                  try:
diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py

index 5d19d40988dd337a580482486121de179c700ea1..88c0d62e8db8d1ee1ad3e408709df6fbbf3b9727 100644 (file)
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
@@ -325,16 +325,26 @@ for t in tests:
  
              # Try the match on a unicode string, and check that it
              # still succeeds.
-            result=obj.search(unicode(s, "latin-1"))
-            if result==None:
-                print '=== Fails on unicode match', t
+            try:
+                u = unicode(s, "latin-1")
+            except NameError:
+                pass
+            else:
+                result=obj.search(u)
+                if result==None:
+                    print '=== Fails on unicode match', t
  
              # Try the match on a unicode pattern, and check that it
              # still succeeds.
-            obj=sre.compile(unicode(pattern, "latin-1"))
-            result=obj.search(s)
-            if result==None:
-                print '=== Fails on unicode pattern match', t
+            try:
+                u = unicode(pattern, "latin-1")
+            except NameError:
+                pass
+            else:
+                obj=sre.compile(u)
+                result=obj.search(s)
+                if result==None:
+                    print '=== Fails on unicode pattern match', t
  
              # Try the match with the search area limited to the extent
              # of the match and see if it still succeeds.  \B will
diff --git a/Modules/_sre.c b/Modules/_sre.c

index f308dacdc0467b378cd99be240add3304b840b44..3d4054a8c7dac2ab849851f73d476d4c362d3afd 100644 (file)
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -24,7 +24,7 @@
   * 2000-10-24 fl  really fixed assert_not; reset groups in findall
   * 2000-12-21 fl  fixed memory leak in groupdict
   * 2001-01-02 fl  properly reset pointer after failed assertion in MIN_UNTIL
- * 2001-01-15 fl  don't use recursion for unbounded MIN_UTIL; fixed
+ * 2001-01-15 fl  avoid recursion for MIN_UTIL; fixed uppercase literal bug
   * 2001-01-16 fl  fixed memory leak in pattern destructor
   *
   * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
@@ -40,7 +40,7 @@
  
  #ifndef SRE_RECURSIVE
  
-char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
+char copyright[] = " SRE 2.1 Copyright (c) 1997-2001 by Secret Labs AB ";
  
  #include "Python.h"
  
@@ -49,7 +49,9 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
  #include <ctype.h>
  
  /* name of this module, minus the leading underscore */
-#define MODULE "sre"
+#if !defined(SRE_MODULE)
+#define SRE_MODULE "sre"
+#endif
  
  /* defining this one enables tracing */
  #undef VERBOSE
@@ -81,6 +83,10 @@ char copyright[] = " SRE 0.9.9 Copyright (c) 1997-2001 by Secret Labs AB ";
  /* enables aggressive inlining (always on for Visual C) */
  #undef USE_INLINE
  
+#if PY_VERSION_HEX < 0x01060000
+#define PyObject_DEL(op) PyMem_DEL((op))
+#endif
+
  /* -------------------------------------------------------------------- */
  
  #if defined(_MSC_VER)
@@ -221,6 +227,23 @@ sre_category(SRE_CODE category, unsigned int ch)
          return SRE_UNI_IS_LINEBREAK(ch);
      case SRE_CATEGORY_UNI_NOT_LINEBREAK:
          return !SRE_UNI_IS_LINEBREAK(ch);
+#else
+    case SRE_CATEGORY_UNI_DIGIT:
+        return SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_NOT_DIGIT:
+        return !SRE_IS_DIGIT(ch);
+    case SRE_CATEGORY_UNI_SPACE:
+        return SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_NOT_SPACE:
+        return !SRE_IS_SPACE(ch);
+    case SRE_CATEGORY_UNI_WORD:
+        return SRE_LOC_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_NOT_WORD:
+        return !SRE_LOC_IS_WORD(ch);
+    case SRE_CATEGORY_UNI_LINEBREAK:
+        return SRE_IS_LINEBREAK(ch);
+    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
+        return !SRE_IS_LINEBREAK(ch);
  #endif
      }
      return 0;
@@ -1208,33 +1231,22 @@ _compile(PyObject* self_, PyObject* args)
      int groups = 0;
      PyObject* groupindex = NULL;
      PyObject* indexgroup = NULL;
-    if (!PyArg_ParseTuple(args, "OiO|iOO", &pattern, &flags, &code,
-                          &groups, &groupindex, &indexgroup))
+    if (!PyArg_ParseTuple(args, "OiO!|iOO", &pattern, &flags,
+                          &PyList_Type, &code, &groups,
+                          &groupindex, &indexgroup))
          return NULL;
  
-    code = PySequence_Fast(code, "code argument must be a sequence");
-    if (!code)
-        return NULL;
-
-#if PY_VERSION_HEX >= 0x01060000
-    n = PySequence_Size(code);
-#else
-    n = PySequence_Length(code);
-#endif
+    n = PyList_GET_SIZE(code);
  
      self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
-    if (!self) {
-        Py_DECREF(code);
+    if (!self)
          return NULL;
-    }
  
      for (i = 0; i < n; i++) {
-        PyObject *o = PySequence_Fast_GET_ITEM(code, i);
+        PyObject *o = PyList_GET_ITEM(code, i);
          self->code[i] = (SRE_CODE) PyInt_AsLong(o);
      }
  
-    Py_DECREF(code);
-
      if (PyErr_Occurred()) {
          PyObject_DEL(self);
          return NULL;
@@ -1270,9 +1282,11 @@ sre_getlower(PyObject* self, PyObject* args)
          return NULL;
      if (flags & SRE_FLAG_LOCALE)
          return Py_BuildValue("i", sre_lower_locale(character));
-#if defined(HAVE_UNICODE)
      if (flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
          return Py_BuildValue("i", sre_lower_unicode(character));
+#else
+        return Py_BuildValue("i", sre_lower_locale(character));
  #endif
      return Py_BuildValue("i", sre_lower(character));
  }
@@ -1380,9 +1394,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
  
      if (pattern->flags & SRE_FLAG_LOCALE)
          state->lower = sre_lower_locale;
-#if defined(HAVE_UNICODE)
      else if (pattern->flags & SRE_FLAG_UNICODE)
+#if defined(HAVE_UNICODE)
          state->lower = sre_lower_unicode;
+#else
+        state->lower = sre_lower_locale;
  #endif
      else
          state->lower = sre_lower;
@@ -1520,7 +1536,7 @@ pattern_scanner(PatternObject* pattern, PyObject* args)
  
      string = state_init(&self->state, pattern, string, start, end);
      if (!string) {
-        PyObject_Del(self);
+        PyObject_DEL(self);
          return NULL;
      }
  
@@ -1619,7 +1635,7 @@ call(char* function, PyObject* args)
      PyObject* func;
      PyObject* result;
  
-    name = PyString_FromString(MODULE);
+    name = PyString_FromString(SRE_MODULE);
      if (!name)
          return NULL;
      module = PyImport_Import(name);
@@ -2366,7 +2382,7 @@ init_sre(void)
      Pattern_Type.ob_type = Match_Type.ob_type =
          Scanner_Type.ob_type = &PyType_Type;
  
-    m = Py_InitModule("_" MODULE, _functions);
+    m = Py_InitModule("_" SRE_MODULE, _functions);
      d = PyModule_GetDict(m);
  
      PyDict_SetItemString(
author	Fredrik Lundh <fredrik@pythonware.com>
	Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)
committer	Fredrik Lundh <fredrik@pythonware.com>
	Tue, 16 Jan 2001 07:37:30 +0000 (07:37 +0000)
Lib/sre.py		patch \| blob \| history
Lib/sre_parse.py		patch \| blob \| history
Lib/test/test_sre.py		patch \| blob \| history
Modules/_sre.c		patch \| blob \| history