char *chars;
char *multis;
} cclasses[] = {
- { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789", "" },
- { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- "" },
- { "blank", " \t", "" },
- { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", "" },
- { "digit", "0123456789", "" },
- { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789", "",
+ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ "",
+ "blank", " \t", "",
+ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177", "",
+ "digit", "0123456789", "",
+ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- "" },
- { "lower", "abcdefghijklmnopqrstuvwxyz",
- "" },
- { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+ "",
+ "lower", "abcdefghijklmnopqrstuvwxyz",
+ "",
+ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- "" },
- { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- "" },
- { "space", "\t\n\v\f\r ", "" },
- { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- "" },
- { "xdigit", "0123456789ABCDEFabcdef",
- "" },
- { NULL, 0, "" }
+ "",
+ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ "",
+ "space", "\t\n\v\f\r ", "",
+ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ "",
+ "xdigit", "0123456789ABCDEFabcdef",
+ "",
+ NULL, 0, ""
};
char *name;
char code;
} cnames[] = {
- { "NUL", '\0' },
- { "SOH", '\001' },
- { "STX", '\002' },
- { "ETX", '\003' },
- { "EOT", '\004' },
- { "ENQ", '\005' },
- { "ACK", '\006' },
- { "BEL", '\007' },
- { "alert", '\007' },
- { "BS", '\010' },
- { "backspace", '\b' },
- { "HT", '\011' },
- { "tab", '\t' },
- { "LF", '\012' },
- { "newline", '\n' },
- { "VT", '\013' },
- { "vertical-tab", '\v' },
- { "FF", '\014' },
- { "form-feed", '\f' },
- { "CR", '\015' },
- { "carriage-return", '\r' },
- { "SO", '\016' },
- { "SI", '\017' },
- { "DLE", '\020' },
- { "DC1", '\021' },
- { "DC2", '\022' },
- { "DC3", '\023' },
- { "DC4", '\024' },
- { "NAK", '\025' },
- { "SYN", '\026' },
- { "ETB", '\027' },
- { "CAN", '\030' },
- { "EM", '\031' },
- { "SUB", '\032' },
- { "ESC", '\033' },
- { "IS4", '\034' },
- { "FS", '\034' },
- { "IS3", '\035' },
- { "GS", '\035' },
- { "IS2", '\036' },
- { "RS", '\036' },
- { "IS1", '\037' },
- { "US", '\037' },
- { "space", ' ' },
- { "exclamation-mark", '!' },
- { "quotation-mark", '"' },
- { "number-sign", '#' },
- { "dollar-sign", '$' },
- { "percent-sign", '%' },
- { "ampersand", '&' },
- { "apostrophe", '\'' },
- { "left-parenthesis", '(' },
- { "right-parenthesis", ')' },
- { "asterisk", '*' },
- { "plus-sign", '+' },
- { "comma", ',' },
- { "hyphen", '-' },
- { "hyphen-minus", '-' },
- { "period", '.' },
- { "full-stop", '.' },
- { "slash", '/' },
- { "solidus", '/' },
- { "zero", '0' },
- { "one", '1' },
- { "two", '2' },
- { "three", '3' },
- { "four", '4' },
- { "five", '5' },
- { "six", '6' },
- { "seven", '7' },
- { "eight", '8' },
- { "nine", '9' },
- { "colon", ':' },
- { "semicolon", ';' },
- { "less-than-sign", '<' },
- { "equals-sign", '=' },
- { "greater-than-sign", '>' },
- { "question-mark", '?' },
- { "commercial-at", '@' },
- { "left-square-bracket", '[' },
- { "backslash", '\\' },
- { "reverse-solidus", '\\' },
- { "right-square-bracket", ']' },
- { "circumflex", '^' },
- { "circumflex-accent", '^' },
- { "underscore", '_' },
- { "low-line", '_' },
- { "grave-accent", '`' },
- { "left-brace", '{' },
- { "left-curly-bracket", '{' },
- { "vertical-line", '|' },
- { "right-brace", '}' },
- { "right-curly-bracket", '}' },
- { "tilde", '~' },
- { "DEL", '\177' },
- { NULL, 0 }
+ "NUL", '\0',
+ "SOH", '\001',
+ "STX", '\002',
+ "ETX", '\003',
+ "EOT", '\004',
+ "ENQ", '\005',
+ "ACK", '\006',
+ "BEL", '\007',
+ "alert", '\007',
+ "BS", '\010',
+ "backspace", '\b',
+ "HT", '\011',
+ "tab", '\t',
+ "LF", '\012',
+ "newline", '\n',
+ "VT", '\013',
+ "vertical-tab", '\v',
+ "FF", '\014',
+ "form-feed", '\f',
+ "CR", '\015',
+ "carriage-return", '\r',
+ "SO", '\016',
+ "SI", '\017',
+ "DLE", '\020',
+ "DC1", '\021',
+ "DC2", '\022',
+ "DC3", '\023',
+ "DC4", '\024',
+ "NAK", '\025',
+ "SYN", '\026',
+ "ETB", '\027',
+ "CAN", '\030',
+ "EM", '\031',
+ "SUB", '\032',
+ "ESC", '\033',
+ "IS4", '\034',
+ "FS", '\034',
+ "IS3", '\035',
+ "GS", '\035',
+ "IS2", '\036',
+ "RS", '\036',
+ "IS1", '\037',
+ "US", '\037',
+ "space", ' ',
+ "exclamation-mark", '!',
+ "quotation-mark", '"',
+ "number-sign", '#',
+ "dollar-sign", '$',
+ "percent-sign", '%',
+ "ampersand", '&',
+ "apostrophe", '\'',
+ "left-parenthesis", '(',
+ "right-parenthesis", ')',
+ "asterisk", '*',
+ "plus-sign", '+',
+ "comma", ',',
+ "hyphen", '-',
+ "hyphen-minus", '-',
+ "period", '.',
+ "full-stop", '.',
+ "slash", '/',
+ "solidus", '/',
+ "zero", '0',
+ "one", '1',
+ "two", '2',
+ "three", '3',
+ "four", '4',
+ "five", '5',
+ "six", '6',
+ "seven", '7',
+ "eight", '8',
+ "nine", '9',
+ "colon", ':',
+ "semicolon", ';',
+ "less-than-sign", '<',
+ "equals-sign", '=',
+ "greater-than-sign", '>',
+ "question-mark", '?',
+ "commercial-at", '@',
+ "left-square-bracket", '[',
+ "backslash", '\\',
+ "reverse-solidus", '\\',
+ "right-square-bracket", ']',
+ "circumflex", '^',
+ "circumflex-accent", '^',
+ "underscore", '_',
+ "low-line", '_',
+ "grave-accent", '`',
+ "left-brace", '{',
+ "left-curly-bracket", '{',
+ "vertical-line", '|',
+ "right-brace", '}',
+ "right-curly-bracket", '}',
+ "tilde", '~',
+ "DEL", '\177',
+ NULL, 0,
};
int eflags;
{
register char *endp;
- register unsigned int i;
+ register int i;
struct match mv;
register struct match *m = &mv;
register char *dp;
- register const sopno gf = g->firststate+1; /* +1 for OEND */
- register const sopno gl = g->laststate;
+ const register sopno gf = g->firststate+1; /* +1 for OEND */
+ const register sopno gl = g->laststate;
char *start;
char *stop;
/* "can't happen" */
assert(nope);
/* NOTREACHED */
- return( NULL );
+ return((char *)NULL); /* dummy */
}
/*
register sopno pc;
register onestate here; /* note, macros know this name */
register sopno look;
- register int i;
+ register long i;
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
#define MORE2() (p->next+1 < p->end)
#define SEE(c) (MORE() && PEEK() == (c))
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
-#define EAT(c) ((SEE(c)) ? (NEXT1(), 1) : 0)
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
-#define NEXT1() (p->next++)
+#define NEXT() (p->next++)
#define NEXT2() (p->next += 2)
#define NEXTn(n) (p->next += (n))
#define GETNEXT() (*p->next++)
#define SETERROR(e) seterr(p, (e))
-#define REQUIRE(co, e) ((void)((co) || SETERROR(e)))
+#define REQUIRE(co, e) ((co) || SETERROR(e))
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
if (!( c == '*' || c == '+' || c == '?' ||
(c == '{' && MORE2() && isdigit(PEEK2())) ))
return; /* no repetition, we're done */
- NEXT1();
+ NEXT();
REQUIRE(!wascaret, REG_BADRPT);
switch (c) {
repeat(p, pos, count, count2);
if (!EAT('}')) { /* error heuristics */
while (MORE() && PEEK() != '}')
- NEXT1();
+ NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
REQUIRE(starordinary, REG_BADRPT);
/* FALLTHROUGH */
default:
- ordinary(p, c &~ BACKSL);
+ ordinary(p, (char)c); /* takes off BACKSL, if any */
break;
}
repeat(p, pos, count, count2);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
- NEXT1();
+ NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
start = p_b_symbol(p);
if (SEE('-') && MORE2() && PEEK2() != ']') {
/* range */
- NEXT1();
+ NEXT();
if (EAT('-'))
finish = '-';
else
register char c;
while (MORE() && isalpha(PEEK()))
- NEXT1();
+ NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
register int len;
while (MORE() && !SEETWO(endc, ']'))
- NEXT1();
+ NEXT();
if (!MORE()) {
SETERROR(REG_EBRACK);
return(0);
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register size_t css = (size_t)p->g->csetsize;
register cset *cs;
{
register uch h = cs->hash;
- register unsigned int i;
+ register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register cset *cs2;
register size_t css = (size_t)p->g->csetsize;
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register size_t css = (size_t)p->g->csetsize;
for (i = 0; i < css; i++)
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register size_t css = (size_t)p->g->csetsize;
register int n = 0;
cs->multis[cs->smultis - 1] = '\0';
}
+/*
+ - mcsub - subtract a collating element from a cset
+ == static void mcsub(register cset *cs, register char *cp);
+ */
+static void
+mcsub(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *fp = mcfind(cs, cp);
+ register size_t len = strlen(fp);
+
+ assert(fp != NULL);
+ (void) memmove(fp, fp + len + 1,
+ cs->smultis - (fp + len + 1 - cs->multis));
+ cs->smultis -= len;
+
+ if (cs->smultis == 0) {
+ free(cs->multis);
+ cs->multis = NULL;
+ return;
+ }
+
+ cs->multis = realloc(cs->multis, cs->smultis);
+ assert(cs->multis != NULL);
+}
+
+/*
+ - mcin - is a collating element in a cset?
+ == static int mcin(register cset *cs, register char *cp);
+ */
+static int
+mcin(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ return(mcfind(cs, cp) != NULL);
+}
+
+/*
+ - mcfind - find a collating element in a cset
+ == static char *mcfind(register cset *cs, register char *cp);
+ */
+static char *
+mcfind(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *p;
+
+ if (cs->multis == NULL)
+ return(NULL);
+ for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
+ if (strcmp(cp, p) == 0)
+ return(p);
+ return(NULL);
+}
/*
- mcinvert - invert the list of collating elements in a cset
static int firstch(register struct parse *p, register cset *cs);
static int nch(register struct parse *p, register cset *cs);
static void mcadd(register struct parse *p, register cset *cs, register char *cp);
+static void mcsub(register cset *cs, register char *cp);
+static int mcin(register cset *cs, register char *cp);
+static char *mcfind(register cset *cs, register char *cp);
static void mcinvert(register struct parse *p, register cset *cs);
static void mccase(register struct parse *p, register cset *cs);
static int isinsets(register struct re_guts *g, int c);
#include "regerror.ih"
/*
+ = #define REG_OKAY 0
= #define REG_NOMATCH 1
= #define REG_BADPAT 2
= #define REG_ECOLLATE 3
char *name;
char *explain;
} rerrs[] = {
- { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
- { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
- { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
- { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
- { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
- { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
- { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
- { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
- { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
- { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
- { REG_ERANGE, "REG_ERANGE", "invalid character range" },
- { REG_ESPACE, "REG_ESPACE", "out of memory" },
- { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
- { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
- { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
- { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
- { 0, "", "*** unknown regexp error code ***" }
+ REG_OKAY, "REG_OKAY", "no errors detected",
+ REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match",
+ REG_BADPAT, "REG_BADPAT", "invalid regular expression",
+ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
+ REG_ECTYPE, "REG_ECTYPE", "invalid character class",
+ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)",
+ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
+ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced",
+ REG_EPAREN, "REG_EPAREN", "parentheses not balanced",
+ REG_EBRACE, "REG_EBRACE", "braces not balanced",
+ REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
+ REG_ERANGE, "REG_ERANGE", "invalid character range",
+ REG_ESPACE, "REG_ESPACE", "out of memory",
+ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid",
+ REG_EMPTY, "REG_EMPTY", "empty (sub)expression",
+ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
+ REG_INVARG, "REG_INVARG", "invalid argument to regex routine",
+ -1, "", "*** unknown regexp error code ***",
};
/*
if (errcode == REG_ATOI)
s = regatoi(preg, convbuf);
else {
- for (r = rerrs; r->code != 0; r++)
+ for (r = rerrs; r->code >= 0; r++)
if (r->code == target)
break;
if (errcode®_ITOA) {
- if (r->code != 0)
+ if (r->code >= 0)
(void) strcpy(convbuf, r->name);
else
sprintf(convbuf, "REG_0x%x", target);
{
register struct rerr *r;
- for (r = rerrs; r->code != 0; r++)
+ for (r = rerrs; r->code >= 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)
break;
- if (r->code == 0)
+ if (r->code < 0)
return("0");
sprintf(localbuf, "%d", r->code);
/* === regerror.c === */
+#define REG_OKAY 0
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
-typedef unsigned long sop; /* strip operator */
+typedef long sop; /* strip operator */
typedef long sopno;
-#define OPRMASK 0xf8000000
-#define OPDMASK 0x07ffffff
-#define OPSHIFT ((unsigned)27)
+#define OPRMASK 0x7c000000
+#define OPDMASK 0x03ffffff
+#define OPSHIFT (26)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
#define SOP(op, opnd) ((op)|(opnd))
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
-#define OOR1 (16u<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
-#define OOR2 (17u<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
-#define O_CH (18u<<OPSHIFT) /* end choice back to OOR1 */
-#define OBOW (19u<<OPSHIFT) /* begin word - */
-#define OEOW (20u<<OPSHIFT) /* end word - */
+#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
+#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
+#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
+#define OBOW (19<<OPSHIFT) /* begin word - */
+#define OEOW (20<<OPSHIFT) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
+#define MCsub(p, cs, cp) mcsub(p, cs, cp)
+#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
#include "utils.h"
#include "regex2.h"
-#ifndef NDEBUG
static int nope = 0; /* for use in asserts; shuts lint up */
-#endif
/* macros for manipulating states, small version */
-#define states long
-#define states1 states /* for later use in regexec() decision */
+#define states unsigned
+#define states1 unsigned /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
-#define SET0(v, n) ((v) &= ~(1 << (n)))
-#define SET1(v, n) ((v) |= 1 << (n))
-#define ISSET(v, n) ((v) & (1 << (n)))
+#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
+#define SET1(v, n) ((v) |= (unsigned)1 << (n))
+#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
#define STATEVARS int dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
-#define onestate int
+#define onestate unsigned
#define INIT(o, n) ((o) = (unsigned)1 << (n))
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) ((v) & (o))
/* utility definitions */
-#ifndef _POSIX2_RE_DUP_MAX
-#define _POSIX2_RE_DUP_MAX 255
+#ifdef _POSIX2_RE_DUP_MAX
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#else
+#define DUPMAX 255
#endif
-
-#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
char *chars;
char *multis;
} cclasses[] = {
- { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
-0123456789", "" },
- { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- "" },
- { "blank", " \t", "" },
- { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
-\25\26\27\30\31\32\33\34\35\36\37\177", "" },
- { "digit", "0123456789", "" },
- { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789", "",
+ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+ "",
+ "blank", " \t", "",
+ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177", "",
+ "digit", "0123456789", "",
+ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- "" },
- { "lower", "abcdefghijklmnopqrstuvwxyz",
- "" },
- { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+ "",
+ "lower", "abcdefghijklmnopqrstuvwxyz",
+ "",
+ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
- "" },
- { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
- "" },
- { "space", "\t\n\v\f\r ", "" },
- { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
- "" },
- { "xdigit", "0123456789ABCDEFabcdef",
- "" },
- { NULL, 0, "" }
+ "",
+ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+ "",
+ "space", "\t\n\v\f\r ", "",
+ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+ "",
+ "xdigit", "0123456789ABCDEFabcdef",
+ "",
+ NULL, 0, ""
};
char *name;
char code;
} cnames[] = {
- { "NUL", '\0' },
- { "SOH", '\001' },
- { "STX", '\002' },
- { "ETX", '\003' },
- { "EOT", '\004' },
- { "ENQ", '\005' },
- { "ACK", '\006' },
- { "BEL", '\007' },
- { "alert", '\007' },
- { "BS", '\010' },
- { "backspace", '\b' },
- { "HT", '\011' },
- { "tab", '\t' },
- { "LF", '\012' },
- { "newline", '\n' },
- { "VT", '\013' },
- { "vertical-tab", '\v' },
- { "FF", '\014' },
- { "form-feed", '\f' },
- { "CR", '\015' },
- { "carriage-return", '\r' },
- { "SO", '\016' },
- { "SI", '\017' },
- { "DLE", '\020' },
- { "DC1", '\021' },
- { "DC2", '\022' },
- { "DC3", '\023' },
- { "DC4", '\024' },
- { "NAK", '\025' },
- { "SYN", '\026' },
- { "ETB", '\027' },
- { "CAN", '\030' },
- { "EM", '\031' },
- { "SUB", '\032' },
- { "ESC", '\033' },
- { "IS4", '\034' },
- { "FS", '\034' },
- { "IS3", '\035' },
- { "GS", '\035' },
- { "IS2", '\036' },
- { "RS", '\036' },
- { "IS1", '\037' },
- { "US", '\037' },
- { "space", ' ' },
- { "exclamation-mark", '!' },
- { "quotation-mark", '"' },
- { "number-sign", '#' },
- { "dollar-sign", '$' },
- { "percent-sign", '%' },
- { "ampersand", '&' },
- { "apostrophe", '\'' },
- { "left-parenthesis", '(' },
- { "right-parenthesis", ')' },
- { "asterisk", '*' },
- { "plus-sign", '+' },
- { "comma", ',' },
- { "hyphen", '-' },
- { "hyphen-minus", '-' },
- { "period", '.' },
- { "full-stop", '.' },
- { "slash", '/' },
- { "solidus", '/' },
- { "zero", '0' },
- { "one", '1' },
- { "two", '2' },
- { "three", '3' },
- { "four", '4' },
- { "five", '5' },
- { "six", '6' },
- { "seven", '7' },
- { "eight", '8' },
- { "nine", '9' },
- { "colon", ':' },
- { "semicolon", ';' },
- { "less-than-sign", '<' },
- { "equals-sign", '=' },
- { "greater-than-sign", '>' },
- { "question-mark", '?' },
- { "commercial-at", '@' },
- { "left-square-bracket", '[' },
- { "backslash", '\\' },
- { "reverse-solidus", '\\' },
- { "right-square-bracket", ']' },
- { "circumflex", '^' },
- { "circumflex-accent", '^' },
- { "underscore", '_' },
- { "low-line", '_' },
- { "grave-accent", '`' },
- { "left-brace", '{' },
- { "left-curly-bracket", '{' },
- { "vertical-line", '|' },
- { "right-brace", '}' },
- { "right-curly-bracket", '}' },
- { "tilde", '~' },
- { "DEL", '\177' },
- { NULL, 0 }
+ "NUL", '\0',
+ "SOH", '\001',
+ "STX", '\002',
+ "ETX", '\003',
+ "EOT", '\004',
+ "ENQ", '\005',
+ "ACK", '\006',
+ "BEL", '\007',
+ "alert", '\007',
+ "BS", '\010',
+ "backspace", '\b',
+ "HT", '\011',
+ "tab", '\t',
+ "LF", '\012',
+ "newline", '\n',
+ "VT", '\013',
+ "vertical-tab", '\v',
+ "FF", '\014',
+ "form-feed", '\f',
+ "CR", '\015',
+ "carriage-return", '\r',
+ "SO", '\016',
+ "SI", '\017',
+ "DLE", '\020',
+ "DC1", '\021',
+ "DC2", '\022',
+ "DC3", '\023',
+ "DC4", '\024',
+ "NAK", '\025',
+ "SYN", '\026',
+ "ETB", '\027',
+ "CAN", '\030',
+ "EM", '\031',
+ "SUB", '\032',
+ "ESC", '\033',
+ "IS4", '\034',
+ "FS", '\034',
+ "IS3", '\035',
+ "GS", '\035',
+ "IS2", '\036',
+ "RS", '\036',
+ "IS1", '\037',
+ "US", '\037',
+ "space", ' ',
+ "exclamation-mark", '!',
+ "quotation-mark", '"',
+ "number-sign", '#',
+ "dollar-sign", '$',
+ "percent-sign", '%',
+ "ampersand", '&',
+ "apostrophe", '\'',
+ "left-parenthesis", '(',
+ "right-parenthesis", ')',
+ "asterisk", '*',
+ "plus-sign", '+',
+ "comma", ',',
+ "hyphen", '-',
+ "hyphen-minus", '-',
+ "period", '.',
+ "full-stop", '.',
+ "slash", '/',
+ "solidus", '/',
+ "zero", '0',
+ "one", '1',
+ "two", '2',
+ "three", '3',
+ "four", '4',
+ "five", '5',
+ "six", '6',
+ "seven", '7',
+ "eight", '8',
+ "nine", '9',
+ "colon", ':',
+ "semicolon", ';',
+ "less-than-sign", '<',
+ "equals-sign", '=',
+ "greater-than-sign", '>',
+ "question-mark", '?',
+ "commercial-at", '@',
+ "left-square-bracket", '[',
+ "backslash", '\\',
+ "reverse-solidus", '\\',
+ "right-square-bracket", ']',
+ "circumflex", '^',
+ "circumflex-accent", '^',
+ "underscore", '_',
+ "low-line", '_',
+ "grave-accent", '`',
+ "left-brace", '{',
+ "left-curly-bracket", '{',
+ "vertical-line", '|',
+ "right-brace", '}',
+ "right-curly-bracket", '}',
+ "tilde", '~',
+ "DEL", '\177',
+ NULL, 0,
};
int eflags;
{
register char *endp;
- register unsigned int i;
+ register int i;
struct match mv;
register struct match *m = &mv;
register char *dp;
- register const sopno gf = g->firststate+1; /* +1 for OEND */
- register const sopno gl = g->laststate;
+ const register sopno gf = g->firststate+1; /* +1 for OEND */
+ const register sopno gl = g->laststate;
char *start;
char *stop;
/* "can't happen" */
assert(nope);
/* NOTREACHED */
- return( NULL );
+ return((char *)NULL); /* dummy */
}
/*
register sopno pc;
register onestate here; /* note, macros know this name */
register sopno look;
- register int i;
+ register long i;
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
#define MORE2() (p->next+1 < p->end)
#define SEE(c) (MORE() && PEEK() == (c))
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
-#define EAT(c) ((SEE(c)) ? (NEXT1(), 1) : 0)
+#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
-#define NEXT1() (p->next++)
+#define NEXT() (p->next++)
#define NEXT2() (p->next += 2)
#define NEXTn(n) (p->next += (n))
#define GETNEXT() (*p->next++)
#define SETERROR(e) seterr(p, (e))
-#define REQUIRE(co, e) ((void)((co) || SETERROR(e)))
+#define REQUIRE(co, e) ((co) || SETERROR(e))
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
if (!( c == '*' || c == '+' || c == '?' ||
(c == '{' && MORE2() && isdigit(PEEK2())) ))
return; /* no repetition, we're done */
- NEXT1();
+ NEXT();
REQUIRE(!wascaret, REG_BADRPT);
switch (c) {
repeat(p, pos, count, count2);
if (!EAT('}')) { /* error heuristics */
while (MORE() && PEEK() != '}')
- NEXT1();
+ NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
REQUIRE(starordinary, REG_BADRPT);
/* FALLTHROUGH */
default:
- ordinary(p, c &~ BACKSL);
+ ordinary(p, (char)c); /* takes off BACKSL, if any */
break;
}
repeat(p, pos, count, count2);
if (!EATTWO('\\', '}')) { /* error heuristics */
while (MORE() && !SEETWO('\\', '}'))
- NEXT1();
+ NEXT();
REQUIRE(MORE(), REG_EBRACE);
SETERROR(REG_BADBR);
}
start = p_b_symbol(p);
if (SEE('-') && MORE2() && PEEK2() != ']') {
/* range */
- NEXT1();
+ NEXT();
if (EAT('-'))
finish = '-';
else
register char c;
while (MORE() && isalpha(PEEK()))
- NEXT1();
+ NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
register int len;
while (MORE() && !SEETWO(endc, ']'))
- NEXT1();
+ NEXT();
if (!MORE()) {
SETERROR(REG_EBRACK);
return(0);
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register size_t css = (size_t)p->g->csetsize;
register cset *cs;
{
register uch h = cs->hash;
- register unsigned int i;
+ register int i;
register cset *top = &p->g->sets[p->g->ncsets];
register cset *cs2;
register size_t css = (size_t)p->g->csetsize;
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register size_t css = (size_t)p->g->csetsize;
for (i = 0; i < css; i++)
register struct parse *p;
register cset *cs;
{
- register unsigned int i;
+ register int i;
register size_t css = (size_t)p->g->csetsize;
register int n = 0;
cs->multis[cs->smultis - 1] = '\0';
}
+/*
+ - mcsub - subtract a collating element from a cset
+ == static void mcsub(register cset *cs, register char *cp);
+ */
+static void
+mcsub(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *fp = mcfind(cs, cp);
+ register size_t len = strlen(fp);
+
+ assert(fp != NULL);
+ (void) memmove(fp, fp + len + 1,
+ cs->smultis - (fp + len + 1 - cs->multis));
+ cs->smultis -= len;
+
+ if (cs->smultis == 0) {
+ free(cs->multis);
+ cs->multis = NULL;
+ return;
+ }
+
+ cs->multis = realloc(cs->multis, cs->smultis);
+ assert(cs->multis != NULL);
+}
+
+/*
+ - mcin - is a collating element in a cset?
+ == static int mcin(register cset *cs, register char *cp);
+ */
+static int
+mcin(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ return(mcfind(cs, cp) != NULL);
+}
+
+/*
+ - mcfind - find a collating element in a cset
+ == static char *mcfind(register cset *cs, register char *cp);
+ */
+static char *
+mcfind(cs, cp)
+register cset *cs;
+register char *cp;
+{
+ register char *p;
+
+ if (cs->multis == NULL)
+ return(NULL);
+ for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
+ if (strcmp(cp, p) == 0)
+ return(p);
+ return(NULL);
+}
/*
- mcinvert - invert the list of collating elements in a cset
static int firstch(register struct parse *p, register cset *cs);
static int nch(register struct parse *p, register cset *cs);
static void mcadd(register struct parse *p, register cset *cs, register char *cp);
+static void mcsub(register cset *cs, register char *cp);
+static int mcin(register cset *cs, register char *cp);
+static char *mcfind(register cset *cs, register char *cp);
static void mcinvert(register struct parse *p, register cset *cs);
static void mccase(register struct parse *p, register cset *cs);
static int isinsets(register struct re_guts *g, int c);
#include "regerror.ih"
/*
+ = #define REG_OKAY 0
= #define REG_NOMATCH 1
= #define REG_BADPAT 2
= #define REG_ECOLLATE 3
char *name;
char *explain;
} rerrs[] = {
- { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" },
- { REG_BADPAT, "REG_BADPAT", "invalid regular expression" },
- { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
- { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
- { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" },
- { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
- { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" },
- { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" },
- { REG_EBRACE, "REG_EBRACE", "braces not balanced" },
- { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
- { REG_ERANGE, "REG_ERANGE", "invalid character range" },
- { REG_ESPACE, "REG_ESPACE", "out of memory" },
- { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" },
- { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" },
- { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
- { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" },
- { 0, "", "*** unknown regexp error code ***" }
+ REG_OKAY, "REG_OKAY", "no errors detected",
+ REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match",
+ REG_BADPAT, "REG_BADPAT", "invalid regular expression",
+ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
+ REG_ECTYPE, "REG_ECTYPE", "invalid character class",
+ REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)",
+ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
+ REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced",
+ REG_EPAREN, "REG_EPAREN", "parentheses not balanced",
+ REG_EBRACE, "REG_EBRACE", "braces not balanced",
+ REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
+ REG_ERANGE, "REG_ERANGE", "invalid character range",
+ REG_ESPACE, "REG_ESPACE", "out of memory",
+ REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid",
+ REG_EMPTY, "REG_EMPTY", "empty (sub)expression",
+ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
+ REG_INVARG, "REG_INVARG", "invalid argument to regex routine",
+ -1, "", "*** unknown regexp error code ***",
};
/*
if (errcode == REG_ATOI)
s = regatoi(preg, convbuf);
else {
- for (r = rerrs; r->code != 0; r++)
+ for (r = rerrs; r->code >= 0; r++)
if (r->code == target)
break;
if (errcode®_ITOA) {
- if (r->code != 0)
+ if (r->code >= 0)
(void) strcpy(convbuf, r->name);
else
sprintf(convbuf, "REG_0x%x", target);
{
register struct rerr *r;
- for (r = rerrs; r->code != 0; r++)
+ for (r = rerrs; r->code >= 0; r++)
if (strcmp(r->name, preg->re_endp) == 0)
break;
- if (r->code == 0)
+ if (r->code < 0)
return("0");
sprintf(localbuf, "%d", r->code);
/* === regerror.c === */
+#define REG_OKAY 0
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
-typedef unsigned long sop; /* strip operator */
+typedef long sop; /* strip operator */
typedef long sopno;
-#define OPRMASK 0xf8000000
-#define OPDMASK 0x07ffffff
-#define OPSHIFT ((unsigned)27)
+#define OPRMASK 0x7c000000
+#define OPDMASK 0x03ffffff
+#define OPSHIFT (26)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
#define SOP(op, opnd) ((op)|(opnd))
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
-#define OOR1 (16u<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
-#define OOR2 (17u<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
-#define O_CH (18u<<OPSHIFT) /* end choice back to OOR1 */
-#define OBOW (19u<<OPSHIFT) /* begin word - */
-#define OEOW (20u<<OPSHIFT) /* end word - */
+#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
+#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
+#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
+#define OBOW (19<<OPSHIFT) /* begin word - */
+#define OEOW (20<<OPSHIFT) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
+#define MCsub(p, cs, cp) mcsub(p, cs, cp)
+#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
#include "utils.h"
#include "regex2.h"
-#ifndef NDEBUG
static int nope = 0; /* for use in asserts; shuts lint up */
-#endif
/* macros for manipulating states, small version */
-#define states long
-#define states1 states /* for later use in regexec() decision */
+#define states unsigned
+#define states1 unsigned /* for later use in regexec() decision */
#define CLEAR(v) ((v) = 0)
-#define SET0(v, n) ((v) &= ~(1 << (n)))
-#define SET1(v, n) ((v) |= 1 << (n))
-#define ISSET(v, n) ((v) & (1 << (n)))
+#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
+#define SET1(v, n) ((v) |= (unsigned)1 << (n))
+#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
#define ASSIGN(d, s) ((d) = (s))
#define EQ(a, b) ((a) == (b))
#define STATEVARS int dummy /* dummy version */
#define STATESETUP(m, n) /* nothing */
#define STATETEARDOWN(m) /* nothing */
#define SETUP(v) ((v) = 0)
-#define onestate int
+#define onestate unsigned
#define INIT(o, n) ((o) = (unsigned)1 << (n))
#define INC(o) ((o) <<= 1)
#define ISSTATEIN(v, o) ((v) & (o))
/* utility definitions */
-#ifndef _POSIX2_RE_DUP_MAX
-#define _POSIX2_RE_DUP_MAX 255
+#ifdef _POSIX2_RE_DUP_MAX
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#else
+#define DUPMAX 255
#endif
-
-#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */
#define INFINITY (DUPMAX + 1)
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;