From: Tom Lane Date: Tue, 13 Feb 2001 00:02:36 +0000 (+0000) Subject: Clean up portability problems in regexp package: change all routine X-Git-Tag: REL7_1~435 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f7a839bc2ba3f15d48006fe931499d4d9cfb314f;p=postgresql Clean up portability problems in regexp package: change all routine definitions from K&R to ANSI C style, and fix broken assumption that int and long are the same datatype. This repairs problems observed on Alpha with regexps having between 32 and 63 states. --- diff --git a/src/backend/regex/engine.c b/src/backend/regex/engine.c index 58f776a560..17aa3c8a49 100644 --- a/src/backend/regex/engine.c +++ b/src/backend/regex/engine.c @@ -80,37 +80,26 @@ struct match pg_wchar *endp; /* end of string -- virtual NUL here */ pg_wchar *coldp; /* can be no match starting before here */ pg_wchar **lastpos; /* [nplus+1] */ - STATEVARS; + STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ states tmp; /* temporary */ states empty; /* empty set of states */ }; -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" -{ -#endif +static int matcher(struct re_guts *g, pg_wchar *string, size_t nmatch, + regmatch_t *pmatch, int eflags); +static pg_wchar *dissect(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst); +static pg_wchar *backref(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst, sopno lev); +static pg_wchar *fast(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst); +static pg_wchar *slow(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst); +static states step(struct re_guts *g, sopno start, + sopno stop, states bef, int ch, states aft); -/* === engine.c === */ - static int - matcher(struct re_guts * g, pg_wchar * string, size_t nmatch, - regmatch_t *pmatch, int eflags); - static pg_wchar * - dissect(struct match * m, pg_wchar * start, pg_wchar * stop, - sopno startst, sopno stopst); - static pg_wchar * - backref(struct match * m, pg_wchar * start, pg_wchar * stop, - sopno startst, sopno stopst, sopno lev); - static pg_wchar * - fast(struct match * m, pg_wchar * start, pg_wchar * stop, - sopno startst, sopno stopst); - static pg_wchar * - slow(struct match * m, pg_wchar * start, pg_wchar * stop, sopno startst, sopno stopst); - static states - step(struct re_guts * g, sopno start, - sopno stop, states bef, int ch, states aft); #define BOL (OUT+1) #define EOL (BOL+1) #define BOLEOL (BOL+2) @@ -128,24 +117,13 @@ extern "C" #endif #ifdef REDEBUG - static void - print(struct match * m, pg_wchar * caption, states st, int ch, FILE *d); -#endif -#ifdef REDEBUG - static void - at(struct match * m, pg_wchar * title, pg_wchar * start, pg_wchar * stop, - sopno startst, sopno stopst); -#endif -#ifdef REDEBUG - static pg_wchar * - p_char(int ch); -#endif - -#ifdef __cplusplus -} - +static void print(struct match *m, pg_wchar *caption, states st, int ch, + FILE *d); +static void at(struct match *m, pg_wchar *title, pg_wchar *start, + pg_wchar *stop, sopno startst, sopno stopst); +static pg_wchar *pchar(int ch); +static int pg_isprint(int c); #endif -/* ========= end header generated by ./mkh ========= */ #ifdef REDEBUG #define SP(t, s, c) print(m, t, s, c, stdout) @@ -158,17 +136,11 @@ extern "C" #endif /* - - matcher - the actual matching engine - == static int matcher(struct re_guts *g, pg_wchar *string, \ - == size_t nmatch, regmatch_t *pmatch, int eflags); + * matcher - the actual matching engine */ static int /* 0 success, REG_NOMATCH failure */ -matcher(g, string, nmatch, pmatch, eflags) -struct re_guts *g; -pg_wchar *string; -size_t nmatch; -regmatch_t *pmatch; -int eflags; +matcher(struct re_guts *g, pg_wchar *string, size_t nmatch, + regmatch_t *pmatch, int eflags) { pg_wchar *endp; int i; @@ -206,10 +178,11 @@ int eflags; for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && #ifdef MULTIBYTE - memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0) + memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0 #else - memcmp(dp, g->must, (size_t) g->mlen) == 0) + memcmp(dp, g->must, (size_t) g->mlen) == 0 #endif + ) break; if (dp == stop) /* we didn't find g->must */ return REG_NOMATCH; @@ -349,17 +322,11 @@ int eflags; } /* - - dissect - figure out what matched what, no back references - == static char *dissect(struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + * dissect - figure out what matched what, no back references */ static pg_wchar * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) -struct match *m; -pg_wchar *start; -pg_wchar *stop; -sopno startst; -sopno stopst; +dissect(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst) { int i; sopno ss; /* start sop of current subRE */ @@ -549,18 +516,13 @@ sopno stopst; } /* - - backref - figure out what matched what, figuring in back references - == static char *backref(struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst, sopno lev); + * backref - figure out what matched what, figuring in back references + * + * lev is PLUS nesting level */ static pg_wchar * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) -struct match *m; -pg_wchar *start; -pg_wchar *stop; -sopno startst; -sopno stopst; -sopno lev; /* PLUS nesting level */ +backref(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst, sopno lev) { int i; sopno ss; /* start sop of current subRE */ @@ -763,17 +725,11 @@ sopno lev; /* PLUS nesting level */ } /* - - fast - step through the string at top speed - == static char *fast(struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + * fast - step through the string at top speed */ static pg_wchar * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) -struct match *m; -pg_wchar *start; -pg_wchar *stop; -sopno startst; -sopno stopst; +fast(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst) { states st = m->st; states fresh = m->fresh; @@ -858,17 +814,11 @@ sopno stopst; } /* - - slow - step through the string more deliberately - == static char *slow(struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + * slow - step through the string more deliberately */ static pg_wchar * /* where it ended */ -slow(m, start, stop, startst, stopst) -struct match *m; -pg_wchar *start; -pg_wchar *stop; -sopno startst; -sopno stopst; +slow(struct match *m, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst) { states st = m->st; states empty = m->empty; @@ -948,27 +898,15 @@ sopno stopst; /* - - step - map set of states reachable before char to set reachable after - == static states step(struct re_guts *g, sopno start, sopno stop, \ - == states bef, int ch, states aft); - == #define BOL (OUT+1) - == #define EOL (BOL+1) - == #define BOLEOL (BOL+2) - == #define NOTHING (BOL+3) - == #define BOW (BOL+4) - == #define EOW (BOL+5) - == #define CODEMAX (BOL+5) // highest code used - == #define NONCHAR(c) ((c) > CHAR_MAX) - == #define NNONCHAR (CODEMAX-CHAR_MAX) + * step - map set of states reachable before char to set reachable after */ static states -step(g, start, stop, bef, ch, aft) -struct re_guts *g; -sopno start; /* start state within strip */ -sopno stop; /* state after stop state within strip */ -states bef; /* states reachable before */ -int ch; /* character or NONCHAR code */ -states aft; /* states already known reachable after */ +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + int ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ { cset *cs; sop s; @@ -1082,19 +1020,11 @@ states aft; /* states already known reachable after */ #ifdef REDEBUG /* - - print - print a set of states - == #ifdef REDEBUG - == static void print(struct match *m, char *caption, states st, \ - == int ch, FILE *d); - == #endif + * print - print a set of states */ static void -print(m, caption, st, ch, d) -struct match *m; -pg_wchar *caption; -states st; -int ch; -FILE *d; +print(struct match *m, pg_wchar *caption, states st, + int ch, FILE *d) { struct re_guts *g = m->g; int i; @@ -1116,20 +1046,11 @@ FILE *d; } /* - - at - print current situation - == #ifdef REDEBUG - == static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \ - == sopno startst, sopno stopst); - == #endif + * at - print current situation */ static void -at(m, title, start, stop, startst, stopst) -struct match *m; -pg_wchar *title; -pg_wchar *start; -pg_wchar *stop; -sopno startst; -sopno stopst; +at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, + sopno startst, sopno stopst) { if (!(m->eflags & REG_TRACE)) return; @@ -1140,19 +1061,26 @@ sopno stopst; } #ifndef PCHARDONE -#define PCHARDONE /* never again */ +#define PCHARDONE /* only do this once */ /* - - pchar - make a character printable - == #ifdef REDEBUG - == static char *pchar(int ch); - == #endif + * pchar - make a character printable * * Is this identical to regchar() over in debug.c? Well, yes. But a * duplicate here avoids having a debugging-capable regexec.o tied to * a matching debug.o, and this is convenient. It all disappears in * the non-debug compilation anyway, so it doesn't matter much. */ +static pg_wchar * /* -> representation */ +pchar(int ch) +{ + static pg_wchar pbuf[10]; + if (pg_isprint(ch) || ch == ' ') + sprintf(pbuf, "%c", ch); + else + sprintf(pbuf, "\\%o", ch); + return pbuf; +} static int pg_isprint(int c) @@ -1164,19 +1092,6 @@ pg_isprint(int c) #endif } -static pg_wchar * /* -> representation */ -pchar(ch) -int ch; -{ - static pg_wchar pbuf[10]; - - if (pg_isprint(ch) || ch == ' ') - sprintf(pbuf, "%c", ch); - else - sprintf(pbuf, "\\%o", ch); - return pbuf; -} - #endif #endif diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 3d9ff83de8..b45a3c5237 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -37,19 +37,11 @@ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; - -#endif /* LIBC_SCCS and not lint */ - #include "postgres.h" #include -#include -#include #include #include -#include #include #include "regex/regex.h" @@ -78,64 +70,51 @@ struct parse sopno pend[NPAREN]; /* -> ) ([0] unused) */ }; -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" -{ -#endif - -/* === regcomp.c === */ - static void p_ere(struct parse * p, int stop); - static void p_ere_exp(struct parse * p); - static void p_str(struct parse * p); - static void p_bre(struct parse * p, int end1, int end2); - static int p_simp_re(struct parse * p, int starordinary); - static int p_count(struct parse * p); - static void p_bracket(struct parse * p); - static void p_b_term(struct parse * p, cset *cs); - static void p_b_cclass(struct parse * p, cset *cs); - static void p_b_eclass(struct parse * p, cset *cs); - static pg_wchar p_b_symbol(struct parse * p); - static char p_b_coll_elem(struct parse * p, int endc); +static void p_ere(struct parse * p, int stop); +static void p_ere_exp(struct parse * p); +static void p_str(struct parse * p); +static void p_bre(struct parse * p, int end1, int end2); +static int p_simp_re(struct parse * p, int starordinary); +static int p_count(struct parse * p); +static void p_bracket(struct parse * p); +static void p_b_term(struct parse * p, cset *cs); +static void p_b_cclass(struct parse * p, cset *cs); +static void p_b_eclass(struct parse * p, cset *cs); +static pg_wchar p_b_symbol(struct parse * p); +static char p_b_coll_elem(struct parse * p, int endc); #ifdef MULTIBYTE - static unsigned char othercase(int ch); +static unsigned char othercase(int ch); #else - static char othercase(int ch); -#endif - static void bothcases(struct parse * p, int ch); - static void ordinary(struct parse * p, int ch); - static void nonnewline(struct parse * p); - static void repeat(struct parse * p, sopno start, int from, int to); - static int seterr(struct parse * p, int e); - static cset *allocset(struct parse * p); - static void freeset(struct parse * p, cset *cs); - static int freezeset(struct parse * p, cset *cs); - static int firstch(struct parse * p, cset *cs); - static int nch(struct parse * p, cset *cs); - static void mcadd(struct parse * p, cset *cs, char *cp); - static void mcinvert(struct parse * p, cset *cs); - static void mccase(struct parse * p, cset *cs); - static int isinsets(struct re_guts * g, int c); - static int samesets(struct re_guts * g, int c1, int c2); - static void categorize(struct parse * p, struct re_guts * g); - static sopno dupl(struct parse * p, sopno start, sopno finish); - static void doemit(struct parse * p, sop op, size_t opnd); - static void doinsert(struct parse * p, sop op, size_t opnd, sopno pos); - static void dofwd(struct parse * p, sopno pos, sop value); - static void enlarge(struct parse * p, sopno size); - static void stripsnug(struct parse * p, struct re_guts * g); - static void findmust(struct parse * p, struct re_guts * g); - static sopno pluscount(struct parse * p, struct re_guts * g); - static int pg_isdigit(int c); - static int pg_isalpha(int c); - static int pg_isupper(int c); - static int pg_islower(int c); - -#ifdef __cplusplus -} - +static char othercase(int ch); #endif -/* ========= end header generated by ./mkh ========= */ +static void bothcases(struct parse * p, int ch); +static void ordinary(struct parse * p, int ch); +static void nonnewline(struct parse * p); +static void repeat(struct parse * p, sopno start, int from, int to); +static int seterr(struct parse * p, int e); +static cset *allocset(struct parse * p); +static void freeset(struct parse * p, cset *cs); +static int freezeset(struct parse * p, cset *cs); +static int firstch(struct parse * p, cset *cs); +static int nch(struct parse * p, cset *cs); +static void mcadd(struct parse * p, cset *cs, char *cp); +static void mcinvert(struct parse * p, cset *cs); +static void mccase(struct parse * p, cset *cs); +static int isinsets(struct re_guts * g, int c); +static int samesets(struct re_guts * g, int c1, int c2); +static void categorize(struct parse * p, struct re_guts * g); +static sopno dupl(struct parse * p, sopno start, sopno finish); +static void doemit(struct parse * p, sop op, size_t opnd); +static void doinsert(struct parse * p, sop op, size_t opnd, sopno pos); +static void dofwd(struct parse * p, sopno pos, sop value); +static void enlarge(struct parse * p, sopno size); +static void stripsnug(struct parse * p, struct re_guts * g); +static void findmust(struct parse * p, struct re_guts * g); +static sopno pluscount(struct parse * p, struct re_guts * g); +static int pg_isdigit(int c); +static int pg_isalpha(int c); +static int pg_isupper(int c); +static int pg_islower(int c); static pg_wchar nuls[10]; /* place to point scanner in event of * error */ @@ -178,22 +157,10 @@ static int never = 0; /* for use in asserts; shuts lint up */ #endif /* - - regcomp - interface for parser and compilation - = extern int regcomp(regex_t *, const char *, int); - = #define REG_BASIC 0000 - = #define REG_EXTENDED 0001 - = #define REG_ICASE 0002 - = #define REG_NOSUB 0004 - = #define REG_NEWLINE 0010 - = #define REG_NOSPEC 0020 - = #define REG_PEND 0040 - = #define REG_DUMP 0200 + * regcomp - interface for parser and compilation */ int /* 0 success, otherwise REG_something */ -pg95_regcomp(preg, pattern, cflags) -regex_t *preg; -const char *pattern; -int cflags; +pg95_regcomp(regex_t *preg, const char *pattern, int cflags) { struct parse pa; struct re_guts *g; @@ -325,13 +292,11 @@ int cflags; } /* - - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(struct parse *p, int stop); + * p_ere - ERE parser top level, concatenation and alternation */ static void -p_ere(p, stop) -struct parse *p; -int stop; /* character this ERE should end at */ +p_ere(struct parse *p, + int stop) /* character this ERE should end at */ { char c; sopno prevback = 0; @@ -374,12 +339,10 @@ int stop; /* character this ERE should end at */ } /* - - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(struct parse *p); + * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op */ static void -p_ere_exp(p) -struct parse *p; +p_ere_exp(struct parse *p) { pg_wchar c; sopno pos; @@ -535,12 +498,10 @@ struct parse *p; } /* - - p_str - string (no metacharacters) "parser" - == static void p_str(struct parse *p); + * p_str - string (no metacharacters) "parser" */ static void -p_str(p) -struct parse *p; +p_str(struct parse *p) { REQUIRE(MORE(), REG_EMPTY); while (MORE()) @@ -548,9 +509,8 @@ struct parse *p; } /* - - p_bre - BRE parser top level, anchoring and concatenation - == static void p_bre(struct parse *p, int end1, \ - == int end2); + * p_bre - BRE parser top level, anchoring and concatenation + * * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -560,10 +520,9 @@ struct parse *p; * The amount of lookahead needed to avoid this kludge is excessive. */ static void -p_bre(p, end1, end2) -struct parse *p; -int end1; /* first terminating character */ -int end2; /* second terminating character */ +p_bre(struct parse *p, + int end1, /* first terminating character */ + int end2) /* second terminating character */ { sopno start = HERE(); int first = 1; /* first subexpression? */ @@ -592,13 +551,11 @@ int end2; /* second terminating character */ } /* - - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(struct parse *p, int starordinary); + * p_simp_re - parse a simple RE, an atom possibly followed by a repetition */ static int /* was the simple RE an unbackslashed $? */ -p_simp_re(p, starordinary) -struct parse *p; -int starordinary; /* is a leading * an ordinary character? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ { int c; int count; @@ -731,12 +688,10 @@ int starordinary; /* is a leading * an ordinary character? */ } /* - - p_count - parse a repetition count - == static int p_count(struct parse *p); + * p_count - parse a repetition count */ static int /* the value */ -p_count(p) -struct parse *p; +p_count(struct parse *p) { int count = 0; int ndigits = 0; @@ -752,15 +707,13 @@ struct parse *p; } /* - - p_bracket - parse a bracketed character list - == static void p_bracket(struct parse *p); + * p_bracket - parse a bracketed character list * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */ static void -p_bracket(p) -struct parse *p; +p_bracket(struct parse *p) { cset *cs = allocset(p); int invert = 0; @@ -850,13 +803,10 @@ struct parse *p; } /* - - p_b_term - parse one term of a bracketed character list - == static void p_b_term(struct parse *p, cset *cs); + * p_b_term - parse one term of a bracketed character list */ static void -p_b_term(p, cs) -struct parse *p; -cset *cs; +p_b_term(struct parse *p, cset *cs) { pg_wchar c; pg_wchar start, @@ -925,13 +875,10 @@ cset *cs; } /* - - p_b_cclass - parse a character-class name and deal with it - == static void p_b_cclass(struct parse *p, cset *cs); + * p_b_cclass - parse a character-class name and deal with it */ static void -p_b_cclass(p, cs) -struct parse *p; -cset *cs; +p_b_cclass(struct parse *p, cset *cs) { pg_wchar *sp = p->next; struct cclass *cp; @@ -964,15 +911,12 @@ cset *cs; } /* - - p_b_eclass - parse an equivalence-class name and deal with it - == static void p_b_eclass(struct parse *p, cset *cs); + * p_b_eclass - parse an equivalence-class name and deal with it * * This implementation is incomplete. xxx */ static void -p_b_eclass(p, cs) -struct parse *p; -cset *cs; +p_b_eclass(struct parse *p, cset *cs) { char c; @@ -981,12 +925,10 @@ cset *cs; } /* - - p_b_symbol - parse a character or [..]ed multicharacter collating symbol - == static char p_b_symbol(struct parse *p); + * p_b_symbol - parse a character or [..]ed multicharacter collating symbol */ static pg_wchar /* value of symbol */ -p_b_symbol(p) -struct parse *p; +p_b_symbol(struct parse *p) { pg_wchar value; @@ -1001,13 +943,10 @@ struct parse *p; } /* - - p_b_coll_elem - parse a collating-element name and look it up - == static char p_b_coll_elem(struct parse *p, int endc); + * p_b_coll_elem - parse a collating-element name and look it up */ static char /* value of collating element */ -p_b_coll_elem(p, endc) -struct parse *p; -int endc; /* name ended by endc,']' */ +p_b_coll_elem(struct parse *p, int endc) { pg_wchar *sp = p->next; struct cname *cp; @@ -1035,16 +974,14 @@ int endc; /* name ended by endc,']' */ } /* - - othercase - return the case counterpart of an alphabetic - == static char othercase(int ch); + * othercase - return the case counterpart of an alphabetic */ #ifdef MULTIBYTE static unsigned char /* if no counterpart, return ch */ #else static char /* if no counterpart, return ch */ #endif -othercase(ch) -int ch; +othercase(int ch) { assert(pg_isalpha(ch)); if (pg_isupper(ch)) @@ -1069,15 +1006,12 @@ int ch; } /* - - bothcases - emit a dualcase version of a two-case character - == static void bothcases(struct parse *p, int ch); + * bothcases - emit a dualcase version of a two-case character * * Boy, is this implementation ever a kludge... */ static void -bothcases(p, ch) -struct parse *p; -int ch; +bothcases(struct parse *p, int ch) { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; @@ -1096,13 +1030,10 @@ int ch; } /* - - ordinary - emit an ordinary character - == static void ordinary(struct parse *p, int ch); + * ordinary - emit an ordinary character */ static void -ordinary(p, ch) -struct parse *p; -int ch; +ordinary(struct parse *p, int ch) { cat_t *cap = p->g->categories; @@ -1121,14 +1052,12 @@ int ch; } /* - - nonnewline - emit REG_NEWLINE version of OANY - == static void nonnewline(struct parse *p); + * nonnewline - emit REG_NEWLINE version of OANY * * Boy, is this implementation ever a kludge... */ static void -nonnewline(p) -struct parse *p; +nonnewline(struct parse *p) { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; @@ -1147,15 +1076,13 @@ struct parse *p; } /* - - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(struct parse *p, sopno start, int from, int to); + * repeat - generate code for a bounded repetition, recursively if needed */ static void -repeat(p, start, from, to) -struct parse *p; -sopno start; /* operand from here to end of strip */ -int from; /* repeated from this number */ -int to; /* to this number of times (maybe +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe * INFINITY) */ { sopno finish = HERE(); @@ -1222,13 +1149,10 @@ int to; /* to this number of times (maybe } /* - - seterr - set an error condition - == static int seterr(struct parse *p, int e); + * seterr - set an error condition */ static int /* useless but makes type checking happy */ -seterr(p, e) -struct parse *p; -int e; +seterr(struct parse *p, int e) { if (p->error == 0) /* keep earliest error condition */ p->error = e; @@ -1238,12 +1162,10 @@ int e; } /* - - allocset - allocate a set of characters for [] - == static cset *allocset(struct parse *p); + * allocset - allocate a set of characters for [] */ static cset * -allocset(p) -struct parse *p; +allocset(struct parse *p) { int no = p->g->ncsets++; size_t nc; @@ -1296,13 +1218,10 @@ struct parse *p; } /* - - freeset - free a now-unused set - == static void freeset(struct parse *p, cset *cs); + * freeset - free a now-unused set */ static void -freeset(p, cs) -struct parse *p; -cset *cs; +freeset(struct parse *p, cset *cs) { int i; cset *top = &p->g->sets[p->g->ncsets]; @@ -1315,8 +1234,7 @@ cset *cs; } /* - - freezeset - final processing on a set of characters - == static int freezeset(struct parse *p, cset *cs); + * freezeset - final processing on a set of characters * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win @@ -1325,9 +1243,7 @@ cset *cs; * the same value! */ static int /* set number */ -freezeset(p, cs) -struct parse *p; -cset *cs; +freezeset(struct parse *p, cset *cs) { uch h = cs->hash; int i; @@ -1357,13 +1273,10 @@ cset *cs; } /* - - firstch - return first character in a set (which must have at least one) - == static int firstch(struct parse *p, cset *cs); + * firstch - return first character in a set (which must have at least one) */ static int /* character; there is no "none" value */ -firstch(p, cs) -struct parse *p; -cset *cs; +firstch(struct parse *p, cset *cs) { int i; size_t css = (size_t) p->g->csetsize; @@ -1376,13 +1289,10 @@ cset *cs; } /* - - nch - number of characters in a set - == static int nch(struct parse *p, cset *cs); + * nch - number of characters in a set */ static int -nch(p, cs) -struct parse *p; -cset *cs; +nch(struct parse *p, cset *cs) { int i; size_t css = (size_t) p->g->csetsize; @@ -1395,15 +1305,10 @@ cset *cs; } /* - - mcadd - add a collating element to a cset - == static void mcadd(struct parse *p, cset *cs, \ - == char *cp); + * mcadd - add a collating element to a cset */ static void -mcadd(p, cs, cp) -struct parse *p; -cset *cs; -char *cp; +mcadd(struct parse *p, cset *cs, char *cp) { size_t oldend = cs->smultis; @@ -1423,106 +1328,34 @@ char *cp; } /* - - mcsub - subtract a collating element from a cset - == static void mcsub(cset *cs, char *cp); - */ -/* -static void -mcsub(cs, cp) -cset *cs; -char *cp; -{ - char *fp = mcfind(cs, cp); - size_t len = strlen(fp); - - assert(fp != NULL); - memmove(fp, fp + len + 1, - cs->smultis - (fp + len + 1 - cs->multis)); - cs->smultis -= len; - - if (cs->smultis == 0) { - free(cs->multis); - cs->multis = NULL; - return; - } - - cs->multis = realloc(cs->multis, cs->smultis); - assert(cs->multis != NULL); -} -*/ - -/* - - mcin - is a collating element in a cset? - == static int mcin(cset *cs, char *cp); - */ -/* -static int -mcin(cs, cp) -cset *cs; -char *cp; -{ - return(mcfind(cs, cp) != NULL); -} -*/ - -/* - - mcfind - find a collating element in a cset - == static char *mcfind(cset *cs, char *cp); - */ -/* -static char * -mcfind(cs, cp) -cset *cs; -char *cp; -{ - char *p; - - if (cs->multis == NULL) - return(NULL); - for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) - if (strcmp(cp, p) == 0) - return(p); - return(NULL); -} -*/ -/* - - mcinvert - invert the list of collating elements in a cset - == static void mcinvert(struct parse *p, cset *cs); + * mcinvert - invert the list of collating elements in a cset * * This would have to know the set of possibilities. Implementation * is deferred. */ static void -mcinvert(p, cs) -struct parse *p; -cset *cs; +mcinvert(struct parse *p, cset *cs) { assert(cs->multis == NULL); /* xxx */ } /* - - mccase - add case counterparts of the list of collating elements in a cset - == static void mccase(struct parse *p, cset *cs); + * mccase - add case counterparts of the list of collating elements in a cset * * This would have to know the set of possibilities. Implementation * is deferred. */ static void -mccase(p, cs) -struct parse *p; -cset *cs; +mccase(struct parse *p, cset *cs) { assert(cs->multis == NULL); /* xxx */ } /* - - isinsets - is this character in any sets? - == static int isinsets(struct re_guts *g, int c); + * isinsets - is this character in any sets? */ static int /* predicate */ -isinsets(g, c) -struct re_guts *g; -int c; +isinsets(struct re_guts *g, int c) { uch *col; int i; @@ -1536,14 +1369,10 @@ int c; } /* - - samesets - are these two characters in exactly the same sets? - == static int samesets(struct re_guts *g, int c1, int c2); + * samesets - are these two characters in exactly the same sets? */ static int /* predicate */ -samesets(g, c1, c2) -struct re_guts *g; -int c1; -int c2; +samesets(struct re_guts *g, int c1, int c2) { uch *col; int i; @@ -1558,13 +1387,10 @@ int c2; } /* - - categorize - sort out character categories - == static void categorize(struct parse *p, struct re_guts *g); + * categorize - sort out character categories */ static void -categorize(p, g) -struct parse *p; -struct re_guts *g; +categorize(struct parse *p, struct re_guts *g) { cat_t *cats = g->categories; int c; @@ -1587,14 +1413,12 @@ struct re_guts *g; } /* - - dupl - emit a duplicate of a bunch of sops - == static sopno dupl(struct parse *p, sopno start, sopno finish); + * dupl - emit a duplicate of a bunch of sops */ static sopno /* start of duplicate */ -dupl(p, start, finish) -struct parse *p; -sopno start; /* from here */ -sopno finish; /* to this less one */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ { sopno ret = HERE(); sopno len = finish - start; @@ -1611,18 +1435,14 @@ sopno finish; /* to this less one */ } /* - - doemit - emit a strip operator - == static void doemit(struct parse *p, sop op, size_t opnd); + * doemit - emit a strip operator * * It might seem better to implement this as a macro with a function as * hard-case backup, but it's just too big and messy unless there are * some changes to the data structures. Maybe later. */ static void -doemit(p, op, opnd) -struct parse *p; -sop op; -size_t opnd; +doemit(struct parse *p, sop op, size_t opnd) { /* avoid making error situations worse */ if (p->error != 0) @@ -1641,15 +1461,10 @@ size_t opnd; } /* - - doinsert - insert a sop into the strip - == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); + * doinsert - insert a sop into the strip */ static void -doinsert(p, op, opnd, pos) -struct parse *p; -sop op; -size_t opnd; -sopno pos; +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) { sopno sn; sop s; @@ -1680,14 +1495,10 @@ sopno pos; } /* - - dofwd - complete a forward reference - == static void dofwd(struct parse *p, sopno pos, sop value); + * dofwd - complete a forward reference */ static void -dofwd(p, pos, value) -struct parse *p; -sopno pos; -sop value; +dofwd(struct parse *p, sopno pos, sop value) { /* avoid making error situations worse */ if (p->error != 0) @@ -1698,13 +1509,10 @@ sop value; } /* - - enlarge - enlarge the strip - == static void enlarge(struct parse *p, sopno size); + * enlarge - enlarge the strip */ static void -enlarge(p, size) -struct parse *p; -sopno size; +enlarge(struct parse *p, sopno size) { sop *sp; @@ -1722,13 +1530,10 @@ sopno size; } /* - - stripsnug - compact the strip - == static void stripsnug(struct parse *p, struct re_guts *g); + * stripsnug - compact the strip */ static void -stripsnug(p, g) -struct parse *p; -struct re_guts *g; +stripsnug(struct parse *p, struct re_guts *g) { g->nstates = p->slen; g->strip = (sop *) realloc((char *) p->strip, p->slen * sizeof(sop)); @@ -1740,8 +1545,7 @@ struct re_guts *g; } /* - - findmust - fill in must and mlen with longest mandatory literal string - == static void findmust(struct parse *p, struct re_guts *g); + * findmust - fill in must and mlen with longest mandatory literal string * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences. Someday. This code is simple and finds most @@ -1750,9 +1554,7 @@ struct re_guts *g; * Note that must and mlen got initialized during setup. */ static void -findmust(p, g) -struct parse *p; -struct re_guts *g; +findmust(struct parse *p, struct re_guts *g) { sop *scan; sop *start = 0; @@ -1838,13 +1640,10 @@ struct re_guts *g; } /* - - pluscount - count + nesting - == static sopno pluscount(struct parse *p, struct re_guts *g); + * pluscount - count + nesting */ static sopno /* nesting depth */ -pluscount(p, g) -struct parse *p; -struct re_guts *g; +pluscount(struct parse *p, struct re_guts *g) { sop *scan; sop s; @@ -1876,7 +1675,7 @@ struct re_guts *g; } /* - * some ctype functions with none-ascii-char guard + * some ctype functions with non-ascii-char guard */ static int pg_isdigit(int c) diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index d44b6db57c..b9b7ada250 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -37,59 +37,19 @@ * @(#)regerror.c 8.4 (Berkeley) 3/20/94 */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94"; - -#endif /* LIBC_SCCS and not lint */ - #include "postgres.h" #include -#include -#include #include #include -#include #include #include "regex/regex.h" #include "regex/utils.h" #include "regex/regex2.h" -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" -{ -#endif - -/* === regerror.c === */ - static char *regatoi(const regex_t *preg, char *localbuf); +static char *regatoi(const regex_t *preg, char *localbuf); -#ifdef __cplusplus -} - -#endif -/* ========= end header generated by ./mkh ========= */ -/* - = #define REG_NOMATCH 1 - = #define REG_BADPAT 2 - = #define REG_ECOLLATE 3 - = #define REG_ECTYPE 4 - = #define REG_EESCAPE 5 - = #define REG_ESUBREG 6 - = #define REG_EBRACK 7 - = #define REG_EPAREN 8 - = #define REG_EBRACE 9 - = #define REG_BADBR 10 - = #define REG_ERANGE 11 - = #define REG_ESPACE 12 - = #define REG_BADRPT 13 - = #define REG_EMPTY 14 - = #define REG_ASSERT 15 - = #define REG_INVARG 16 - = #define REG_ATOI 255 // convert name to number (!) - = #define REG_ITOA 0400 // convert number to name (!) - */ static struct rerr { int code; @@ -152,16 +112,12 @@ static struct rerr }; /* - - regerror - the interface to error numbers - = extern size_t regerror(int, const regex_t *, char *, size_t); + * regerror - the interface to error numbers */ /* ARGSUSED */ size_t -pg95_regerror(errcode, preg, errbuf, errbuf_size) -int errcode; -const regex_t *preg; -char *errbuf; -size_t errbuf_size; +pg95_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size) { struct rerr *r; size_t len; @@ -206,13 +162,10 @@ size_t errbuf_size; } /* - - regatoi - internal routine to implement REG_ATOI - == static char *regatoi(const regex_t *preg, char *localbuf); + * regatoi - internal routine to implement REG_ATOI */ static char * -regatoi(preg, localbuf) -const regex_t *preg; -char *localbuf; +regatoi(const regex_t *preg, char *localbuf) { struct rerr *r; diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index c5a45b3889..2d87d249bf 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -37,11 +37,6 @@ * @(#)regexec.c 8.3 (Berkeley) 3/20/94 */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; - -#endif /* LIBC_SCCS and not lint */ - #include "postgres.h" /* @@ -52,9 +47,6 @@ static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94"; * representations for state sets. */ #include -#include -#include -#include #include #include #include @@ -69,24 +61,24 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #define states long #define states1 states /* for later use in regexec() decision */ #define CLEAR(v) ((v) = 0) -#define SET0(v, n) ((v) &= ~(1 << (n))) -#define SET1(v, n) ((v) |= 1 << (n)) -#define ISSET(v, n) ((v) & (1 << (n))) +#define SET0(v, n) ((v) &= ~(1L << (n))) +#define SET1(v, n) ((v) |= (1L << (n))) +#define ISSET(v, n) ((v) & (1L << (n))) #define ASSIGN(d, s) ((d) = (s)) #define EQ(a, b) ((a) == (b)) #define STATEVARS int dummy /* dummy version */ #define STATESETUP(m, n) /* nothing */ #define STATETEARDOWN(m) /* nothing */ #define SETUP(v) ((v) = 0) -#define onestate int -#define INIT(o, n) ((o) = (unsigned)1 << (n)) -#define INC(o) ((o) <<= 1) -#define ISSTATEIN(v, o) ((v) & (o)) +#define onestate long +#define INIT(o, n) ((o) = (1L << (n))) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) ((v) & (o)) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ -#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) -#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) -#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) +#define FWD(dst, src, n) ((dst) |= ((src) & (here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((src) & (here)) >> (n)) +#define ISSETBACK(v, n) ((v) & (here >> (n))) /* function names */ #define SNAMES /* engine.c looks after details */ @@ -129,7 +121,7 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) #define onestate int #define INIT(o, n) ((o) = (n)) -#define INC(o) ((o)++) +#define INC(o) ((o)++) #define ISSTATEIN(v, o) ((v)[o]) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ @@ -142,27 +134,14 @@ static int nope = 0; /* for use in asserts; shuts lint up */ #include "engine.c" /* - - regexec - interface for matching - = extern int regexec(const regex_t *, const char *, size_t, \ - = regmatch_t [], int); - = #define REG_NOTBOL 00001 - = #define REG_NOTEOL 00002 - = #define REG_STARTEND 00004 - = #define REG_TRACE 00400 // tracing of execution - = #define REG_LARGE 01000 // force large representation - = #define REG_BACKR 02000 // force use of backref code + * regexec - interface for matching * * We put this here so we can exploit knowledge of the state representation - * when choosing which matcher to call. Also, by this point the matchers - * have been prototyped. + * when choosing which matcher to call. */ int /* 0 success, REG_NOMATCH failure */ -pg95_regexec(preg, string, nmatch, pmatch, eflags) -const regex_t *preg; -const char *string; -size_t nmatch; -regmatch_t *pmatch; -int eflags; +pg95_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t *pmatch, int eflags) { struct re_guts *g = preg->re_g; diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c index 55d5c62810..87e0c99ef9 100644 --- a/src/backend/regex/regfree.c +++ b/src/backend/regex/regfree.c @@ -37,28 +37,19 @@ * @(#)regfree.c 8.3 (Berkeley) 3/20/94 */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94"; - -#endif /* LIBC_SCCS and not lint */ - #include "postgres.h" #include -#include -#include #include "regex/regex.h" #include "regex/utils.h" #include "regex/regex2.h" /* - - regfree - free everything - = extern void regfree(regex_t *); + * regfree - free everything */ void -pg95_regfree(preg) -regex_t *preg; +pg95_regfree(regex_t *preg) { struct re_guts *g; diff --git a/src/include/regex/cclass.h b/src/include/regex/cclass.h index 2ab5eca33f..8290810259 100644 --- a/src/include/regex/cclass.h +++ b/src/include/regex/cclass.h @@ -94,6 +94,6 @@ static struct cclass "" }, { - NULL, 0, "" + NULL, NULL, "" } }; diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index b0b86f3cb6..b0c136f83d 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -102,10 +102,12 @@ typedef struct #define REG_LARGE 01000 /* force large representation */ #define REG_BACKR 02000 /* force use of backref code */ -int pg95_regcomp(regex_t *, const char *, int); -size_t pg95_regerror(int, const regex_t *, char *, size_t); -int pg95_regexec(const regex_t *, - const char *, size_t, regmatch_t[], int); -void pg95_regfree(regex_t *); +extern int pg95_regcomp(regex_t *preg, const char *pattern, int cflags); +extern size_t pg95_regerror(int errcode, const regex_t *preg, + char *errbuf, size_t errbuf_size); +extern int pg95_regexec(const regex_t *preg, const char *string, + size_t nmatch, + regmatch_t *pmatch, int eflags); +extern void pg95_regfree(regex_t *preg); #endif /* !_REGEX_H_ */ diff --git a/src/include/regex/regex2.h b/src/include/regex/regex2.h index b885ff4318..8bcdddb65a 100644 --- a/src/include/regex/regex2.h +++ b/src/include/regex/regex2.h @@ -39,22 +39,6 @@ #include -/* - * First, the stuff that ends up in the outside-world include file -*/ -/* - typedef off_t regoff_t; - typedef struct { - int re_magic; - size_t re_nsub; // number of parenthesized subexpressions - const char *re_endp; // end pointer for REG_PEND - struct re_guts *re_g; // none of your business :-) - } regex_t; - typedef struct { - regoff_t rm_so; // start of match - regoff_t rm_eo; // end of match - } regmatch_t; -*/ /* * internals of regex_t */ @@ -82,8 +66,8 @@ typedef unsigned long sop; /* strip operator */ typedef long sopno; -#define OPRMASK 0xf8000000 -#define OPDMASK 0x07ffffff +#define OPRMASK ((sop) 0xf8000000) +#define OPDMASK ((sop) 0x07ffffff) #define OPSHIFT ((unsigned)27) #define OP(n) ((n)&OPRMASK) #define OPND(n) ((n)&OPDMASK)