-/*
- * -*- mode: c-mode; c-file-style: python -*-
- */
-
/* regexpr.c
*
* Author: Tatu Ylonen <ylo@ngs.fi>
#define MAX_NESTING 100 /* max nesting level of operators */
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
-#define Sword 1
-static char re_syntax_table[256];
+char re_syntax_table[256];
-static void re_compile_initialize(void)
+void re_compile_initialize(void)
{
int a;
static int syntax_table_inited = 0;
-
+
if (!syntax_table_inited)
{
syntax_table_inited = 1;
for (a = 'A'; a <= 'Z'; a++)
re_syntax_table[a] = Sword;
for (a = '0'; a <= '9'; a++)
- re_syntax_table[a] = Sword;
+ re_syntax_table[a] = Sword | Sdigit;
+ re_syntax_table['_'] = Sword;
+ for (a = 9; a <= 13; a++)
+ re_syntax_table[a] = Swhitespace;
+ re_syntax_table[' '] = Swhitespace;
}
re_compile_initialized = 1;
for (a = 0; a < 256; a++)
return; /* we have already been here */
visited[pos] = 1;
for (;;)
- switch (code[pos++])
- {
+ switch (code[pos++]) {
case Cend:
- {
- *can_be_null = 1;
- return;
- }
+ {
+ *can_be_null = 1;
+ return;
+ }
case Cbol:
case Cbegbuf:
case Cendbuf:
NEW_STATE(state, bufp->num_registers);
- if (!re_compile_initialized)
- re_compile_initialize();
-
continue_matching:
switch (*code++)
{
{
if (text == textend)
goto fail;
- if (SYNTAX(*text) != Sword)
+ if (SYNTAX(*text) & Sword)
goto fail;
if (text == textstart)
goto continue_matching;
- if (SYNTAX(text[-1]) != Sword)
+ if (!(SYNTAX(text[-1]) & Sword))
goto continue_matching;
goto fail;
}
{
if (text == textstart)
goto fail;
- if (SYNTAX(text[-1]) != Sword)
+ if (!(SYNTAX(text[-1]) & Sword))
goto fail;
if (text == textend)
goto continue_matching;
- if (SYNTAX(*text) == Sword)
+ if (SYNTAX(*text) & Sword)
goto fail;
goto continue_matching;
}
if (text == textstart || text == textend)
goto continue_matching;
- if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))
+ if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
goto continue_matching;
goto fail;
}
* beginning and end of buffer. */
if (text == textstart || text == textend)
goto fail;
- if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)))
+ if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
goto fail;
goto continue_matching;
}
case Csyntaxspec:
{
NEXTCHAR(ch);
- if (SYNTAX(ch) != (unsigned char)*code++)
+ if (!(SYNTAX(ch) & (unsigned char)*code++))
goto fail;
goto continue_matching;
}
case Cnotsyntaxspec:
{
NEXTCHAR(ch);
- if (SYNTAX(ch) != (unsigned char)*code++)
+ if (SYNTAX(ch) & (unsigned char)*code++)
break;
goto continue_matching;
}
}
return -1;
}
+
+/*
+** Local Variables:
+** mode: c
+** c-file-style: "python"
+** End:
+*/
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
+#define Sword 1
+#define Swhitespace 2
+#define Sdigit 4
+
/* Rename all exported symbols to avoid conflicts with similarly named
symbols in some systems' standard C libraries... */
#define re_syntax _Py_re_syntax
+#define re_syntax_table _Py_re_syntax_table
+#define re_compile_initialize _Py_re_compile_initialize
#define re_set_syntax _Py_re_set_syntax
#define re_compile_pattern _Py_re_compile_pattern
#define re_match _Py_re_match
/* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */
+extern char re_syntax_table[256];
+
+void re_compile_initialize(void);
+
int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */
#else /* HAVE_PROTOTYPES */
extern int re_syntax;
+extern char re_syntax_table[256];
+void re_compile_initialize();
int re_set_syntax();
char *re_compile_pattern();
int re_match();
static PyObject *ReopError; /* Exception */
+#define IGNORECASE 0x01
+#define MULTILINE 0x02
+#define DOTALL 0x04
+#define VERBOSE 0x08
+
+static char *reop_casefold;
+
static PyObject *
makeresult(regs, num_regs)
struct re_registers *regs;
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
+ PyObject *modules = NULL;
+ PyObject *reopmodule = NULL;
+ PyObject *reopdict = NULL;
+ PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
- bufp.translate=NULL;
+ if (flags & IGNORECASE)
+ {
+ if ((modules = PyImport_GetModuleDict()) == NULL)
+ return NULL;
+
+ if ((reopmodule = PyDict_GetItemString(modules,
+ "reop")) == NULL)
+ return NULL;
+
+ if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+ return NULL;
+
+ if ((casefold = PyDict_GetItemString(reopdict,
+ "casefold")) == NULL)
+ return NULL;
+
+ bufp.translate = PyString_AsString(casefold);
+ }
+ else
+ bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
- for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+ for(i=0; i<bufp.num_registers; i++) {
+ re_regs.start[i]=-1;
+ re_regs.end[i]=-1;
+ }
result = re_match(&bufp,
string, stringlen, pos,
&re_regs);
+
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
+
return NULL;
}
if (result == -1) {
int flags, pos, result;
struct re_pattern_buffer bufp;
struct re_registers re_regs;
+ PyObject *modules = NULL;
+ PyObject *reopmodule = NULL;
+ PyObject *reopdict = NULL;
+ PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated),
/* XXX sanity-check the input data */
bufp.used=bufp.allocated;
- bufp.translate=NULL;
+ if (flags & IGNORECASE)
+ {
+ if ((modules = PyImport_GetModuleDict()) == NULL)
+ return NULL;
+
+ if ((reopmodule = PyDict_GetItemString(modules,
+ "reop")) == NULL)
+ return NULL;
+
+ if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
+ return NULL;
+
+ if ((casefold = PyDict_GetItemString(reopdict,
+ "casefold")) == NULL)
+ return NULL;
+
+ bufp.translate = PyString_AsString(casefold);
+ }
+ else
+ bufp.translate=NULL;
bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null;
bufp.uses_registers=1;
bufp.anchor=anchor;
- for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;}
+ for(i = 0; i < bufp.num_registers; i++) {
+ re_regs.start[i] = -1;
+ re_regs.end[i] = -1;
+ }
result = re_search(&bufp,
string, stringlen, pos, stringlen-pos,
&re_regs);
+
if (result < -1) {
/* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure");
return NULL;
}
+
if (result == -1) {
Py_INCREF(Py_None);
return Py_None;
}
+
return makeresult(&re_regs, bufp.num_registers);
}
void
initreop()
{
- PyObject *m, *d, *v;
+ PyObject *m, *d, *k, *v, *o;
int i;
char *s;
-
+ char j[2];
+
+ re_compile_initialize();
+
m = Py_InitModule("reop", reop_global_methods);
d = PyModule_GetDict(m);
else
s[i] = i;
}
+
if (PyDict_SetItemString(d, "casefold", v) < 0)
goto finally;
Py_DECREF(v);
+ /* Initialize the syntax table */
+
+ o = PyDict_New();
+ if (o == NULL)
+ goto finally;
+
+ j[1] = '\0';
+ for (i = 0; i < 256; i++)
+ {
+ j[0] = i;
+ k = PyString_FromStringAndSize(j, 1);
+ if (k == NULL)
+ goto finally;
+ v = PyInt_FromLong(re_syntax_table[i]);
+ if (v == NULL)
+ goto finally;
+ if (PyDict_SetItem(o, k, v) < 0)
+ goto finally;
+ Py_DECREF(k);
+ Py_DECREF(v);
+ }
+
+ if (PyDict_SetItemString(d, "syntax_table", o) < 0)
+ goto finally;
+ Py_DECREF(o);
+
+ v = PyInt_FromLong(Sword);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "word", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
+ v = PyInt_FromLong(Swhitespace);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "whitespace", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
+ v = PyInt_FromLong(Sdigit);
+ if (v == NULL)
+ goto finally;
+
+ if (PyDict_SetItemString(d, "digit", v) < 0)
+ goto finally;
+ Py_DECREF(v);
+
if (!PyErr_Occurred())
return;
+
finally:
Py_FatalError("can't initialize reop module");
}