]> granicus.if.org Git - python/commitdiff
Jeffrey's latests
authorGuido van Rossum <guido@python.org>
Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
committerGuido van Rossum <guido@python.org>
Thu, 17 Jul 1997 22:41:38 +0000 (22:41 +0000)
Modules/regexpr.c
Modules/regexpr.h
Modules/reopmodule.c

index 6b6ccbefb7557f28131f6fbd223e2b1d03c994df..2d30171f421a511a473eb5e8c78ad85d3f7c7656 100644 (file)
@@ -1,7 +1,3 @@
-/*
- * -*- mode: c-mode; c-file-style: python -*-
- */
-
 /* regexpr.c
  *
  * Author: Tatu Ylonen <ylo@ngs.fi>
@@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
 #define MAX_NESTING 100  /* max nesting level of operators */
 
 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-#define Sword 1
 
-static char re_syntax_table[256];
+char re_syntax_table[256];
 
-static void re_compile_initialize(void)
+void re_compile_initialize(void)
 {
        int a;
   
        static int syntax_table_inited = 0;
-       
+
        if (!syntax_table_inited)
        {
                syntax_table_inited = 1;
@@ -491,7 +486,11 @@ static void re_compile_initialize(void)
                for (a = 'A'; a <= 'Z'; a++)
                        re_syntax_table[a] = Sword;
                for (a = '0'; a <= '9'; a++)
-                       re_syntax_table[a] = Sword;
+                       re_syntax_table[a] = Sword | Sdigit;
+               re_syntax_table['_'] = Sword;
+               for (a = 9; a <= 13; a++)
+                       re_syntax_table[a] = Swhitespace;
+               re_syntax_table[' '] = Swhitespace;
        }
        re_compile_initialized = 1;
        for (a = 0; a < 256; a++)
@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
                return;  /* we have already been here */
        visited[pos] = 1;
        for (;;)
-               switch (code[pos++])
-               {
+               switch (code[pos++]) {
                case Cend:
-               {
-                       *can_be_null = 1;
-                       return;
-               }
+                       {
+                               *can_be_null = 1;
+                               return;
+                       }
                case Cbol:
                case Cbegbuf:
                case Cendbuf:
@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
   
        NEW_STATE(state, bufp->num_registers);
 
-       if (!re_compile_initialized)
-               re_compile_initialize();
-  
   continue_matching:
        switch (*code++)
        {
@@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
        {
                if (text == textend)
                        goto fail;
-               if (SYNTAX(*text) != Sword)
+               if (SYNTAX(*text) & Sword)
                        goto fail;
                if (text == textstart)
                        goto continue_matching;
-               if (SYNTAX(text[-1]) != Sword)
+               if (!(SYNTAX(text[-1]) & Sword))
                        goto continue_matching;
                goto fail;
        }
@@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
        {
                if (text == textstart)
                        goto fail;
-               if (SYNTAX(text[-1]) != Sword)
+               if (!(SYNTAX(text[-1]) & Sword))
                        goto fail;
                if (text == textend)
                        goto continue_matching;
-               if (SYNTAX(*text) == Sword)
+               if (SYNTAX(*text) & Sword)
                        goto fail;
                goto continue_matching;
        }
@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
 
                if (text == textstart || text == textend)
                        goto continue_matching;
-               if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+               if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
                        goto continue_matching;
                goto fail;
        }
@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
                 * beginning and end of buffer.  */
                if (text == textstart || text == textend)
                        goto fail;
-               if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
+               if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
                        goto fail;
                goto continue_matching;
        }
        case Csyntaxspec:
        {
                NEXTCHAR(ch);
-               if (SYNTAX(ch) != (unsigned char)*code++)
+               if (!(SYNTAX(ch) & (unsigned char)*code++))
                        goto fail;
                goto continue_matching;
        }
        case Cnotsyntaxspec:
        {
                NEXTCHAR(ch);
-               if (SYNTAX(ch) != (unsigned char)*code++)
+               if (SYNTAX(ch) & (unsigned char)*code++)
                        break;
                goto continue_matching;
        }
@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
        }
        return -1;
 }
+
+/*
+** Local Variables:
+** mode: c
+** c-file-style: "python"
+** End:
+*/
index 122180276c68f13fd522a326b3e56ba43e659335..91f00b9504ca039e19f909d3a69ff8c214ecb57d 100644 (file)
@@ -67,10 +67,16 @@ typedef struct re_registers
 #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
 #define RE_SYNTAX_EMACS        0
 
+#define Sword       1
+#define Swhitespace 2
+#define Sdigit      4
+
 /* Rename all exported symbols to avoid conflicts with similarly named
    symbols in some systems' standard C libraries... */
 
 #define re_syntax _Py_re_syntax
+#define re_syntax_table _Py_re_syntax_table
+#define re_compile_initialize _Py_re_compile_initialize
 #define re_set_syntax _Py_re_set_syntax
 #define re_compile_pattern _Py_re_compile_pattern
 #define re_match _Py_re_match
@@ -85,6 +91,10 @@ extern int re_syntax;
 /* This is the actual syntax mask.  It was added so that Python could do
  * syntax-dependent munging of patterns before compilation. */
 
+extern char re_syntax_table[256];
+
+void re_compile_initialize(void);
+
 int re_set_syntax(int syntax);
 /* This sets the syntax to use and returns the previous syntax.  The
  * syntax is specified by a bit mask of the above defined bits. */
@@ -133,6 +143,8 @@ int re_exec(char *s);
 #else /* HAVE_PROTOTYPES */
 
 extern int re_syntax;
+extern char re_syntax_table[256];
+void re_compile_initialize();
 int re_set_syntax();
 char *re_compile_pattern();
 int re_match();
index 9b928f5beffd09ade80e487fbfd40cc3cf7739f6..0817626d473f969c44c69d8304d80a80b9f5352a 100644 (file)
@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
 
 static PyObject *ReopError;    /* Exception */ 
 
+#define IGNORECASE 0x01
+#define MULTILINE  0x02
+#define DOTALL     0x04
+#define VERBOSE    0x08
+
+static char *reop_casefold;
+
 static PyObject *
 makeresult(regs, num_regs)
        struct re_registers *regs;
@@ -90,6 +97,10 @@ reop_match(self, args)
        int flags, pos, result;
        struct re_pattern_buffer bufp;
        struct re_registers re_regs;
+       PyObject *modules = NULL;
+       PyObject *reopmodule = NULL;
+       PyObject *reopdict = NULL;
+       PyObject *casefold = NULL;
        
        if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                         &(bufp.buffer), &(bufp.allocated), 
@@ -102,20 +113,44 @@ reop_match(self, args)
 
        /* XXX sanity-check the input data */
        bufp.used=bufp.allocated;
-       bufp.translate=NULL;
+       if (flags & IGNORECASE)
+       {
+               if ((modules = PyImport_GetModuleDict()) == NULL)
+                       return NULL;
+
+               if ((reopmodule = PyDict_GetItemString(modules,
+                                                      "reop")) == NULL)
+                       return NULL;
+
+               if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+                       return NULL;
+
+               if ((casefold = PyDict_GetItemString(reopdict,
+                                                    "casefold")) == NULL)
+                       return NULL;
+
+               bufp.translate = PyString_AsString(casefold);
+       }
+       else
+               bufp.translate=NULL;
        bufp.fastmap_accurate=1;
        bufp.can_be_null=can_be_null;
        bufp.uses_registers=1;
        bufp.anchor=anchor;
        
-       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i=0; i<bufp.num_registers; i++) {
+               re_regs.start[i]=-1;
+               re_regs.end[i]=-1;
+       }
        
        result = re_match(&bufp, 
                          string, stringlen, pos, 
                          &re_regs);
+
        if (result < -1) {
                /* Failure like stack overflow */
                PyErr_SetString(ReopError, "match failure");
+               
                return NULL;
        }
        if (result == -1) {
@@ -136,6 +171,10 @@ reop_search(self, args)
        int flags, pos, result;
        struct re_pattern_buffer bufp;
        struct re_registers re_regs;
+       PyObject *modules = NULL;
+       PyObject *reopmodule = NULL;
+       PyObject *reopdict = NULL;
+       PyObject *casefold = NULL;
        
        if (!PyArg_Parse(args, "(s#iiis#is#i)", 
                         &(bufp.buffer), &(bufp.allocated), 
@@ -148,26 +187,51 @@ reop_search(self, args)
 
        /* XXX sanity-check the input data */
        bufp.used=bufp.allocated;
-       bufp.translate=NULL;
+       if (flags & IGNORECASE)
+       {
+               if ((modules = PyImport_GetModuleDict()) == NULL)
+                       return NULL;
+
+               if ((reopmodule = PyDict_GetItemString(modules,
+                                                      "reop")) == NULL)
+                       return NULL;
+
+               if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+                       return NULL;
+
+               if ((casefold = PyDict_GetItemString(reopdict,
+                                                    "casefold")) == NULL)
+                       return NULL;
+
+               bufp.translate = PyString_AsString(casefold);
+       }
+       else
+               bufp.translate=NULL;
        bufp.fastmap_accurate=1;
        bufp.can_be_null=can_be_null;
        bufp.uses_registers=1;
        bufp.anchor=anchor;
 
-       for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+       for(i = 0; i < bufp.num_registers; i++) {
+               re_regs.start[i] = -1;
+               re_regs.end[i] = -1;
+       }
        
        result = re_search(&bufp, 
                           string, stringlen, pos, stringlen-pos,
                           &re_regs);
+
        if (result < -1) {
                /* Failure like stack overflow */
                PyErr_SetString(ReopError, "match failure");
                return NULL;
        }
+
        if (result == -1) {
                Py_INCREF(Py_None);
                return Py_None;
        }
+
        return makeresult(&re_regs, bufp.num_registers);
 }
 
@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
 void
 initreop()
 {
-       PyObject *m, *d, *v;
+       PyObject *m, *d, *k, *v, *o;
        int i;
        char *s;
-       
+       char j[2];
+
+       re_compile_initialize();
+
        m = Py_InitModule("reop", reop_global_methods);
        d = PyModule_GetDict(m);
        
@@ -370,12 +437,64 @@ initreop()
                else
                        s[i] = i;
        }
+
        if (PyDict_SetItemString(d, "casefold", v) < 0)
                goto finally;
        Py_DECREF(v);
 
+       /* Initialize the syntax table */
+
+       o = PyDict_New();
+       if (o == NULL)
+          goto finally;
+
+       j[1] = '\0';
+       for (i = 0; i < 256; i++)
+       {
+          j[0] = i;
+          k = PyString_FromStringAndSize(j, 1);
+          if (k == NULL)
+             goto finally;
+          v = PyInt_FromLong(re_syntax_table[i]);
+          if (v == NULL)
+             goto finally;
+          if (PyDict_SetItem(o, k, v) < 0)
+             goto finally;
+          Py_DECREF(k);
+          Py_DECREF(v);
+       }
+
+       if (PyDict_SetItemString(d, "syntax_table", o) < 0)
+          goto finally;
+       Py_DECREF(o);
+
+       v = PyInt_FromLong(Sword);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "word", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+
+       v = PyInt_FromLong(Swhitespace);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "whitespace", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+
+       v = PyInt_FromLong(Sdigit);
+       if (v == NULL)
+          goto finally;
+
+       if (PyDict_SetItemString(d, "digit", v) < 0)
+          goto finally;
+       Py_DECREF(v);
+       
        if (!PyErr_Occurred())
                return;
+
   finally:
        Py_FatalError("can't initialize reop module");
 }