Jeffrey's latests

author Guido van Rossum <guido@python.org>

Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)

committer Guido van Rossum <guido@python.org>

Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
author Guido van Rossum <guido@python.org>
Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
committer Guido van Rossum <guido@python.org>
Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
diff --git a/Modules/regexpr.c b/Modules/regexpr.c

index 6b6ccbefb7557f28131f6fbd223e2b1d03c994df..2d30171f421a511a473eb5e8c78ad85d3f7c7656 100644 (file)
--- a/Modules/regexpr.c
+++ b/Modules/regexpr.c
@@ -1,7 +1,3 @@
-/*
- * -*- mode: c-mode; c-file-style: python -*-
- */
-
  /* regexpr.c
   *
   * Author: Tatu Ylonen <ylo@ngs.fi>
@@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
  #define MAX_NESTING 100  /* max nesting level of operators */
  
  #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-#define Sword 1
  
-static char re_syntax_table[256];
+char re_syntax_table[256];
  
-static void re_compile_initialize(void)
+void re_compile_initialize(void)
  {
         int a;
    
         static int syntax_table_inited = 0;
-       
+
         if (!syntax_table_inited)
         {
                 syntax_table_inited = 1;
@@ -491,7 +486,11 @@ static void re_compile_initialize(void)
                 for (a = 'A'; a <= 'Z'; a++)
                         re_syntax_table[a] = Sword;
                 for (a = '0'; a <= '9'; a++)
-                       re_syntax_table[a] = Sword;
+                       re_syntax_table[a] = Sword | Sdigit;
+               re_syntax_table['_'] = Sword;
+               for (a = 9; a <= 13; a++)
+                       re_syntax_table[a] = Swhitespace;
+               re_syntax_table[' '] = Swhitespace;
         }
         re_compile_initialized = 1;
         for (a = 0; a < 256; a++)
@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
                 return;  /* we have already been here */
         visited[pos] = 1;
         for (;;)
-               switch (code[pos++])
-               {
+               switch (code[pos++]) {
                 case Cend:
-               {
-                       *can_be_null = 1;
-                       return;
-               }
+                       {
+                               *can_be_null = 1;
+                               return;
+                       }
                 case Cbol:
                 case Cbegbuf:
                 case Cendbuf:
@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
    
         NEW_STATE(state, bufp->num_registers);
  
-       if (!re_compile_initialized)
-               re_compile_initialize();
-  
    continue_matching:
         switch (*code++)
         {
@@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
         {
                 if (text == textend)
                         goto fail;
-               if (SYNTAX(*text) != Sword)
+               if (SYNTAX(*text) & Sword)
                         goto fail;
                 if (text == textstart)
                         goto continue_matching;
-               if (SYNTAX(text[-1]) != Sword)
+               if (!(SYNTAX(text[-1]) & Sword))
                         goto continue_matching;
                 goto fail;
         }
@@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
         {
                 if (text == textstart)
                         goto fail;
-               if (SYNTAX(text[-1]) != Sword)
+               if (!(SYNTAX(text[-1]) & Sword))
                         goto fail;
                 if (text == textend)
                         goto continue_matching;
-               if (SYNTAX(*text) == Sword)
+               if (SYNTAX(*text) & Sword)
                         goto fail;
                 goto continue_matching;
         }
@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
  
                 if (text == textstart || text == textend)
                         goto continue_matching;
-               if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+               if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
                         goto continue_matching;
                 goto fail;
         }
@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
                  * beginning and end of buffer.  */
                 if (text == textstart || text == textend)
                         goto fail;
-               if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
+               if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
                         goto fail;
                 goto continue_matching;
         }
         case Csyntaxspec:
         {
                 NEXTCHAR(ch);
-               if (SYNTAX(ch) != (unsigned char)*code++)
+               if (!(SYNTAX(ch) & (unsigned char)*code++))
                         goto fail;
                 goto continue_matching;
         }
         case Cnotsyntaxspec:
         {
                 NEXTCHAR(ch);
-               if (SYNTAX(ch) != (unsigned char)*code++)
+               if (SYNTAX(ch) & (unsigned char)*code++)
                         break;
                 goto continue_matching;
         }
@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
         }
         return -1;
  }
+
+/*
+** Local Variables:
+** mode: c
+** c-file-style: "python"
+** End:
+*/
diff --git a/Modules/regexpr.h b/Modules/regexpr.h

index 122180276c68f13fd522a326b3e56ba43e659335..91f00b9504ca039e19f909d3a69ff8c214ecb57d 100644 (file)
--- a/Modules/regexpr.h
+++ b/Modules/regexpr.h
@@ -67,10 +67,16 @@ typedef struct re_registers
  #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
  #define RE_SYNTAX_EMACS        0
  
+#define Sword       1
+#define Swhitespace 2
+#define Sdigit      4
+
  /* Rename all exported symbols to avoid conflicts with similarly named
     symbols in some systems' standard C libraries... */
  
  #define re_syntax _Py_re_syntax
+#define re_syntax_table _Py_re_syntax_table
+#define re_compile_initialize _Py_re_compile_initialize
  #define re_set_syntax _Py_re_set_syntax
  #define re_compile_pattern _Py_re_compile_pattern
  #define re_match _Py_re_match
@@ -85,6 +91,10 @@ extern int re_syntax;
  /* This is the actual syntax mask.  It was added so that Python could do
   * syntax-dependent munging of patterns before compilation. */
  
+extern char re_syntax_table[256];
+
+void re_compile_initialize(void);
+
  int re_set_syntax(int syntax);
  /* This sets the syntax to use and returns the previous syntax.  The
   * syntax is specified by a bit mask of the above defined bits. */
@@ -133,6 +143,8 @@ int re_exec(char *s);
  #else /* HAVE_PROTOTYPES */
  
  extern int re_syntax;
+extern char re_syntax_table[256];
+void re_compile_initialize();
  int re_set_syntax();
  char *re_compile_pattern();
  int re_match();
diff --git a/Modules/reopmodule.c b/Modules/reopmodule.c

index 9b928f5beffd09ade80e487fbfd40cc3cf7739f6..0817626d473f969c44c69d8304d80a80b9f5352a 100644 (file)
--- a/Modules/reopmodule.c
+++ b/Modules/reopmodule.c
@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
  
  static PyObject *ReopError;    /* Exception */ 
  
+#define IGNORECASE 0x01
+#define MULTILINE  0x02
+#define DOTALL     0x04
+#define VERBOSE    0x08
+
+static char *reop_casefold;
+
  static PyObject *
  makeresult(regs, num_regs)
         struct re_registers *regs;
@@ -90,6 +97,10 @@ reop_match(self, args)
         int flags, pos, result;
         struct re_pattern_buffer bufp;
         struct re_registers re_regs;
+       PyObject *modules = NULL;
+       PyObject *reopmodule = NULL;
+       PyObject *reopdict = NULL;
+       PyObject *casefold = NULL;
         
         if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                          &(bufp.buffer), &(bufp.allocated), 
@@ -102,20 +113,44 @@ reop_match(self, args)
  
         /* XXX sanity-check the input data */
         bufp.used=bufp.allocated;
-       bufp.translate=NULL;
+       if (flags & IGNORECASE)
+       {
+               if ((modules = PyImport_GetModuleDict()) == NULL)
+                       return NULL;
+
+               if ((reopmodule = PyDict_GetItemString(modules,
+                                                      "reop")) == NULL)
+                       return NULL;
+
+               if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+                       return NULL;
+
+               if ((casefold = PyDict_GetItemString(reopdict,
+                                                    "casefold")) == NULL)
+                       return NULL;
+
+               bufp.translate = PyString_AsString(casefold);
+       }
+       else
+               bufp.translate=NULL;
         bufp.fastmap_accurate=1;
         bufp.can_be_null=can_be_null;
         bufp.uses_registers=1;
         bufp.anchor=anchor;
         
-       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i=0; i<bufp.num_registers; i++) {
+               re_regs.start[i]=-1;
+               re_regs.end[i]=-1;
+       }
         
         result = re_match(&bufp, 
                           string, stringlen, pos, 
                           &re_regs);
+
         if (result < -1) {
                 /* Failure like stack overflow */
                 PyErr_SetString(ReopError, "match failure");
+               
                 return NULL;
         }
         if (result == -1) {
@@ -136,6 +171,10 @@ reop_search(self, args)
         int flags, pos, result;
         struct re_pattern_buffer bufp;
         struct re_registers re_regs;
+       PyObject *modules = NULL;
+       PyObject *reopmodule = NULL;
+       PyObject *reopdict = NULL;
+       PyObject *casefold = NULL;
         
         if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                          &(bufp.buffer), &(bufp.allocated), 
@@ -148,26 +187,51 @@ reop_search(self, args)
  
         /* XXX sanity-check the input data */
         bufp.used=bufp.allocated;
-       bufp.translate=NULL;
+       if (flags & IGNORECASE)
+       {
+               if ((modules = PyImport_GetModuleDict()) == NULL)
+                       return NULL;
+
+               if ((reopmodule = PyDict_GetItemString(modules,
+                                                      "reop")) == NULL)
+                       return NULL;
+
+               if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+                       return NULL;
+
+               if ((casefold = PyDict_GetItemString(reopdict,
+                                                    "casefold")) == NULL)
+                       return NULL;
+
+               bufp.translate = PyString_AsString(casefold);
+       }
+       else
+               bufp.translate=NULL;
         bufp.fastmap_accurate=1;
         bufp.can_be_null=can_be_null;
         bufp.uses_registers=1;
         bufp.anchor=anchor;
  
-       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i = 0; i < bufp.num_registers; i++) {
+               re_regs.start[i] = -1;
+               re_regs.end[i] = -1;
+       }
         
         result = re_search(&bufp, 
                            string, stringlen, pos, stringlen-pos,
                            &re_regs);
+
         if (result < -1) {
                 /* Failure like stack overflow */
                 PyErr_SetString(ReopError, "match failure");
                 return NULL;
         }
+
         if (result == -1) {
                 Py_INCREF(Py_None);
                 return Py_None;
         }
+
         return makeresult(&re_regs, bufp.num_registers);
  }
  
@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
  void
  initreop()
  {
-       PyObject *m, *d, *v;
+       PyObject *m, *d, *k, *v, *o;
         int i;
         char *s;
-       
+       char j[2];
+
+       re_compile_initialize();
+
         m = Py_InitModule("reop", reop_global_methods);
         d = PyModule_GetDict(m);
         
@@ -370,12 +437,64 @@ initreop()
                 else
                         s[i] = i;
         }
+
         if (PyDict_SetItemString(d, "casefold", v) < 0)
                 goto finally;
         Py_DECREF(v);
  
+       /* Initialize the syntax table */
+
+       o = PyDict_New();
+       if (o == NULL)
+          goto finally;
+
+       j[1] = '\0';
+       for (i = 0; i < 256; i++)
+       {
+          j[0] = i;
+          k = PyString_FromStringAndSize(j, 1);
+          if (k == NULL)
+             goto finally;
+          v = PyInt_FromLong(re_syntax_table[i]);
+          if (v == NULL)
+             goto finally;
+          if (PyDict_SetItem(o, k, v) < 0)
+             goto finally;
+          Py_DECREF(k);
+          Py_DECREF(v);
+       }
+
+       if (PyDict_SetItemString(d, "syntax_table", o) < 0)
+          goto finally;
+       Py_DECREF(o);
+
+       v = PyInt_FromLong(Sword);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "word", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+
+       v = PyInt_FromLong(Swhitespace);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "whitespace", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+
+       v = PyInt_FromLong(Sdigit);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "digit", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+       
         if (!PyErr_Occurred())
                 return;
+
    finally:
         Py_FatalError("can't initialize reop module");
  }
author	Guido van Rossum <guido@python.org>
	Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
committer	Guido van Rossum <guido@python.org>
	Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
Modules/regexpr.c		patch \| blob \| history
Modules/regexpr.h		patch \| blob \| history
Modules/reopmodule.c		patch \| blob \| history