From df1e965e12cdd48c11057ee6e15346ee2b8b02f5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 14 Feb 2008 17:33:37 +0000 Subject: [PATCH] Sync our regex code with upstream changes since last time we did this, which was Tcl 8.4.8. The main changes are to remove the never-fully-implemented code for multi-character collating elements, and to const-ify some stuff a bit more fully. In combination with the recent security patch, this commit brings us into line with Tcl 8.5.0. Note that I didn't make any effort to duplicate a lot of cosmetic changes that they made to bring their copy into line with their own style guidelines, such as adding braces around single-line IF bodies. Most of those we either had done already (such as ANSI-fication of function headers) or there is no point because pgindent would undo the change anyway. --- src/backend/regex/regc_color.c | 35 +--- src/backend/regex/regc_cvec.c | 93 ++------- src/backend/regex/regc_lex.c | 32 +-- src/backend/regex/regc_locale.c | 90 +++----- src/backend/regex/regc_nfa.c | 69 +++--- src/backend/regex/regcomp.c | 360 +++++--------------------------- src/backend/regex/regerror.c | 8 +- src/include/regex/regcustom.h | 6 +- src/include/regex/regguts.h | 18 +- 9 files changed, 165 insertions(+), 546 deletions(-) diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 87eb1e4958..e15fd4b788 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.8 2008/01/03 20:47:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.9 2008/02/14 17:33:37 tgl Exp $ * * * Note that there are some incestuous relationships between this code and @@ -222,7 +222,6 @@ static color /* COLORLESS for error */ newcolor(struct colormap * cm) { struct colordesc *cd; - struct colordesc *new; size_t n; if (CISERR()) @@ -245,24 +244,25 @@ newcolor(struct colormap * cm) else { /* oops, must allocate more */ + struct colordesc *newCd; + n = cm->ncds * 2; if (cm->cd == cm->cdspace) { - new = (struct colordesc *) MALLOC(n * - sizeof(struct colordesc)); - if (new != NULL) - memcpy(VS(new), VS(cm->cdspace), cm->ncds * + newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc)); + if (newCd != NULL) + memcpy(VS(newCd), VS(cm->cdspace), cm->ncds * sizeof(struct colordesc)); } else - new = (struct colordesc *) REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) + newCd = (struct colordesc *) + REALLOC(cm->cd, n * sizeof(struct colordesc)); + if (newCd == NULL) { CERR(REG_ESPACE); return COLORLESS; } - cm->cd = new; + cm->cd = newCd; cm->ncds = n; assert(cm->max < cm->ncds - 1); cm->max++; @@ -634,21 +634,6 @@ uncolorchain(struct colormap * cm, a->colorchainRev = NULL; } -/* - * singleton - is this character in its own color? - */ -static int /* predicate */ -singleton(struct colormap * cm, - chr c) -{ - color co; /* color of c */ - - co = GETCOLOR(cm, c); - if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) - return 1; - return 0; -} - /* * rainbow - add arcs of all full colors (but one) between specified states */ diff --git a/src/backend/regex/regc_cvec.c b/src/backend/regex/regc_cvec.c index 719c4c5ef3..25bfae3e32 100644 --- a/src/backend/regex/regc_cvec.c +++ b/src/backend/regex/regc_cvec.c @@ -28,33 +28,31 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.5 2005/10/15 02:49:24 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_cvec.c,v 1.6 2008/02/14 17:33:37 tgl Exp $ * */ +/* + * Notes: + * Only (selected) functions in _this_ file should treat chr* as non-constant. + */ + /* * newcvec - allocate a new cvec */ static struct cvec * newcvec(int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ + int nranges) /* ... and this many ranges */ { - size_t n; - size_t nc; - struct cvec *cv; + size_t nc = (size_t) nchrs + (size_t) nranges * 2; + size_t n = sizeof(struct cvec) + nc * sizeof(chr); + struct cvec *cv = (struct cvec *) MALLOC(n); - nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2; - - n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *) - + nc * sizeof(chr); - cv = (struct cvec *) MALLOC(n); if (cv == NULL) return NULL; cv->chrspace = nchrs; - cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1); + cv->chrs = (chr *) (((char *) cv) + sizeof(struct cvec)); + cv->ranges = cv->chrs + nchrs; cv->rangespace = nranges; return clearcvec(cv); } @@ -66,17 +64,9 @@ newcvec(int nchrs, /* to hold this many chrs... */ static struct cvec * clearcvec(struct cvec * cv) { - int i; - assert(cv != NULL); cv->nchrs = 0; - assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; cv->nranges = 0; - for (i = 0; i < cv->mccespace; i++) - cv->mcces[i] = NULL; - return cv; } @@ -87,7 +77,6 @@ static void addchr(struct cvec * cv, /* character vector */ chr c) /* character to add */ { - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); cv->chrs[cv->nchrs++] = (chr) c; } @@ -105,73 +94,21 @@ addrange(struct cvec * cv, /* character vector */ cv->nranges++; } -/* - * addmcce - add an MCCE to a cvec - */ -static void -addmcce(struct cvec * cv, /* character vector */ - chr *startp, /* beginning of text */ - chr *endp) /* just past end of text */ -{ - int len; - int i; - chr *s; - chr *d; - - if (startp == NULL && endp == NULL) - return; - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) - *d++ = *s; - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; -} - -/* - * haschr - does a cvec contain this chr? - */ -static int /* predicate */ -haschr(struct cvec * cv, /* character vector */ - chr c) /* character to test for */ -{ - int i; - chr *p; - - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - { - if (*p == c) - return 1; - } - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - { - if ((*p <= c) && (c <= *(p + 1))) - return 1; - } - return 0; -} - /* * getcvec - get a cvec, remembering it as v->cv */ static struct cvec * getcvec(struct vars * v, /* context */ int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ + int nranges) /* ... and this many ranges */ { if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) + nranges <= v->cv->rangespace) return clearcvec(v->cv); if (v->cv != NULL) freecvec(v->cv); - v->cv = newcvec(nchrs, nranges, nmcces); + v->cv = newcvec(nchrs, nranges); if (v->cv == NULL) ERR(REG_ESPACE); diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index fc86ca322a..d2fc591ed8 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.7 2008/01/03 20:47:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.8 2008/02/14 17:33:37 tgl Exp $ * */ @@ -201,8 +201,8 @@ prefixes(struct vars * v) */ static void lexnest(struct vars * v, - chr *beginp, /* start of interpolation */ - chr *endp) /* one past end of interpolation */ + const chr *beginp, /* start of interpolation */ + const chr *endp) /* one past end of interpolation */ { assert(v->savenow == NULL); /* only one level of nesting */ v->savenow = v->now; @@ -214,47 +214,47 @@ lexnest(struct vars * v, /* * string constants to interpolate as expansions of things like \d */ -static chr backd[] = { /* \d */ +static const chr backd[] = { /* \d */ CHR('['), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr backD[] = { /* \D */ +static const chr backD[] = { /* \D */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbackd[] = { /* \d within brackets */ +static const chr brbackd[] = { /* \d within brackets */ CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']') }; -static chr backs[] = { /* \s */ +static const chr backs[] = { /* \s */ CHR('['), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr backS[] = { /* \S */ +static const chr backS[] = { /* \S */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbacks[] = { /* \s within brackets */ +static const chr brbacks[] = { /* \s within brackets */ CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']') }; -static chr backw[] = { /* \w */ +static const chr backw[] = { /* \w */ CHR('['), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr backW[] = { /* \W */ +static const chr backW[] = { /* \W */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr brbackw[] = { /* \w within brackets */ +static const chr brbackw[] = { /* \w within brackets */ CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_') @@ -722,7 +722,7 @@ lexescape(struct vars * v) static chr esc[] = { CHR('E'), CHR('S'), CHR('C') }; - chr *save; + const chr *save; assert(v->cflags & REG_ADVF); @@ -1080,7 +1080,7 @@ brenext(struct vars * v, static void skip(struct vars * v) { - chr *start = v->now; + const chr *start = v->now; assert(v->cflags & REG_EXPANDED); @@ -1119,8 +1119,8 @@ newline(void) */ static chr chrnamed(struct vars * v, - chr *startp, /* start of name */ - chr *endp, /* just past end of name */ + const chr *startp, /* start of name */ + const chr *endp, /* just past end of name */ chr lastresort) /* what to return if name lookup fails */ { celt c; diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c index 09c1bd127d..28f5e7ca12 100644 --- a/src/backend/regex/regc_locale.c +++ b/src/backend/regex/regc_locale.c @@ -47,15 +47,15 @@ * permission to use and distribute the software in accordance with the * terms specified in this license. * - * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.8 2005/11/22 18:17:19 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_locale.c,v 1.9 2008/02/14 17:33:37 tgl Exp $ */ /* ASCII character-name table */ -static struct cname +static const struct cname { - char *name; - char code; + const char *name; + const char code; } cnames[] = { @@ -423,46 +423,15 @@ pg_wc_tolower(pg_wchar c) } -/* - * nmcces - how many distinct MCCEs are there? - */ -static int -nmcces(struct vars * v) -{ - /* - * No multi-character collating elements defined at the moment. - */ - return 0; -} - -/* - * nleaders - how many chrs can be first chrs of MCCEs? - */ -static int -nleaders(struct vars * v) -{ - return 0; -} - -/* - * allmcces - return a cvec with all the MCCEs of the locale - */ -static struct cvec * -allmcces(struct vars * v, /* context */ - struct cvec * cv) /* this is supposed to have enough room */ -{ - return clearcvec(cv); -} - /* * element - map collating-element name to celt */ static celt element(struct vars * v, /* context */ - chr *startp, /* points to start of name */ - chr *endp) /* points just past end of name */ + const chr *startp, /* points to start of name */ + const chr *endp) /* points just past end of name */ { - struct cname *cn; + const struct cname *cn; size_t len; /* generic: one-chr names stand for themselves */ @@ -513,7 +482,7 @@ range(struct vars * v, /* context */ if (!cases) { /* easy version */ - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1); NOERRN(); addrange(cv, a, b); return cv; @@ -527,7 +496,7 @@ range(struct vars * v, /* context */ nchrs = (b - a + 1) * 2 + 4; - cv = getcvec(v, nchrs, 0, 0); + cv = getcvec(v, nchrs, 0); NOERRN(); for (c = a; c <= b; c++) @@ -550,7 +519,6 @@ range(struct vars * v, /* context */ static int /* predicate */ before(celt x, celt y) { - /* trivial because no MCCEs */ if (x < y) return 1; return 0; @@ -571,7 +539,7 @@ eclass(struct vars * v, /* context */ /* crude fake equivalence class for testing */ if ((v->cflags & REG_FAKE) && c == 'x') { - cv = getcvec(v, 4, 0, 0); + cv = getcvec(v, 4, 0); addchr(cv, (chr) 'x'); addchr(cv, (chr) 'y'); if (cases) @@ -585,7 +553,7 @@ eclass(struct vars * v, /* context */ /* otherwise, none */ if (cases) return allcases(v, c); - cv = getcvec(v, 1, 0, 0); + cv = getcvec(v, 1, 0); assert(cv != NULL); addchr(cv, (chr) c); return cv; @@ -598,13 +566,13 @@ eclass(struct vars * v, /* context */ */ static struct cvec * cclass(struct vars * v, /* context */ - chr *startp, /* where the name starts */ - chr *endp, /* just past the end of the name */ + const chr *startp, /* where the name starts */ + const chr *endp, /* just past the end of the name */ int cases) /* case-independent? */ { size_t len; struct cvec *cv = NULL; - char **namePtr; + const char **namePtr; int i, index; @@ -612,7 +580,7 @@ cclass(struct vars * v, /* context */ * The following arrays define the valid character class names. */ - static char *classNames[] = { + static const char *classNames[] = { "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "xdigit", NULL }; @@ -662,7 +630,7 @@ cclass(struct vars * v, /* context */ switch ((enum classes) index) { case CC_PRINT: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -673,7 +641,7 @@ cclass(struct vars * v, /* context */ } break; case CC_ALNUM: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -684,7 +652,7 @@ cclass(struct vars * v, /* context */ } break; case CC_ALPHA: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -695,27 +663,27 @@ cclass(struct vars * v, /* context */ } break; case CC_ASCII: - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1); if (cv) addrange(cv, 0, 0x7f); break; case CC_BLANK: - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0); addchr(cv, '\t'); addchr(cv, ' '); break; case CC_CNTRL: - cv = getcvec(v, 0, 2, 0); + cv = getcvec(v, 0, 2); addrange(cv, 0x0, 0x1f); addrange(cv, 0x7f, 0x9f); break; case CC_DIGIT: - cv = getcvec(v, 0, 1, 0); + cv = getcvec(v, 0, 1); if (cv) addrange(cv, (chr) '0', (chr) '9'); break; case CC_PUNCT: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -726,7 +694,7 @@ cclass(struct vars * v, /* context */ } break; case CC_XDIGIT: - cv = getcvec(v, 0, 3, 0); + cv = getcvec(v, 0, 3); if (cv) { addrange(cv, '0', '9'); @@ -735,7 +703,7 @@ cclass(struct vars * v, /* context */ } break; case CC_SPACE: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -746,7 +714,7 @@ cclass(struct vars * v, /* context */ } break; case CC_LOWER: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -757,7 +725,7 @@ cclass(struct vars * v, /* context */ } break; case CC_UPPER: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -768,7 +736,7 @@ cclass(struct vars * v, /* context */ } break; case CC_GRAPH: - cv = getcvec(v, UCHAR_MAX, 0, 0); + cv = getcvec(v, UCHAR_MAX, 0); if (cv) { for (i = 0; i <= UCHAR_MAX; i++) @@ -802,7 +770,7 @@ allcases(struct vars * v, /* context */ lc = pg_wc_tolower((chr) c); uc = pg_wc_toupper((chr) c); - cv = getcvec(v, 2, 0, 0); + cv = getcvec(v, 2, 0); addchr(cv, lc); if (lc != uc) addchr(cv, uc); diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index ea382df7f9..4e307f5c15 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.5 2008/01/03 20:47:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.6 2008/02/14 17:33:37 tgl Exp $ * * * One or two things that technically ought to be in here @@ -349,8 +349,6 @@ newarc(struct nfa * nfa, if (COLORED(a) && nfa->parent == NULL) colorchain(nfa->cm, a); - - return; } /* @@ -361,8 +359,6 @@ allocarc(struct nfa * nfa, struct state * s) { struct arc *a; - struct arcbatch *new; - int i; /* shortcut */ if (s->free == NULL && s->noas < ABSIZE) @@ -375,22 +371,25 @@ allocarc(struct nfa * nfa, /* if none at hand, get more */ if (s->free == NULL) { - new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); - if (new == NULL) + struct arcbatch *newAb; + int i; + + newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); + if (newAb == NULL) { NERR(REG_ESPACE); return NULL; } - new->next = s->oas.next; - s->oas.next = new; + newAb->next = s->oas.next; + s->oas.next = newAb; for (i = 0; i < ABSIZE; i++) { - new->a[i].type = 0; - new->a[i].freechain = &new->a[i + 1]; + newAb->a[i].type = 0; + newAb->a[i].freechain = &newAb->a[i + 1]; } - new->a[ABSIZE - 1].freechain = NULL; - s->free = &new->a[0]; + newAb->a[ABSIZE - 1].freechain = NULL; + s->free = &newAb->a[0]; } assert(s->free != NULL); @@ -495,20 +494,20 @@ cparc(struct nfa * nfa, */ static void moveins(struct nfa * nfa, - struct state * old, - struct state * new) + struct state * oldState, + struct state * newState) { struct arc *a; - assert(old != new); + assert(oldState != newState); - while ((a = old->ins) != NULL) + while ((a = oldState->ins) != NULL) { - cparc(nfa, a, a->from, new); + cparc(nfa, a, a->from, newState); freearc(nfa, a); } - assert(old->nins == 0); - assert(old->ins == NULL); + assert(oldState->nins == 0); + assert(oldState->ins == NULL); } /* @@ -516,15 +515,15 @@ moveins(struct nfa * nfa, */ static void copyins(struct nfa * nfa, - struct state * old, - struct state * new) + struct state * oldState, + struct state * newState) { struct arc *a; - assert(old != new); + assert(oldState != newState); - for (a = old->ins; a != NULL; a = a->inchain) - cparc(nfa, a, a->from, new); + for (a = oldState->ins; a != NULL; a = a->inchain) + cparc(nfa, a, a->from, newState); } /* @@ -532,16 +531,16 @@ copyins(struct nfa * nfa, */ static void moveouts(struct nfa * nfa, - struct state * old, - struct state * new) + struct state * oldState, + struct state * newState) { struct arc *a; - assert(old != new); + assert(oldState != newState); - while ((a = old->outs) != NULL) + while ((a = oldState->outs) != NULL) { - cparc(nfa, a, new, a->to); + cparc(nfa, a, newState, a->to); freearc(nfa, a); } } @@ -551,15 +550,15 @@ moveouts(struct nfa * nfa, */ static void copyouts(struct nfa * nfa, - struct state * old, - struct state * new) + struct state * oldState, + struct state * newState) { struct arc *a; - assert(old != new); + assert(oldState != newState); - for (a = old->outs; a != NULL; a = a->outchain) - cparc(nfa, a, new, a->to); + for (a = oldState->outs; a != NULL; a = a->outchain) + cparc(nfa, a, newState, a->to); } /* diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 1eaca67b9a..89f4f6f737 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.45 2007/10/06 16:05:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regcomp.c,v 1.46 2008/02/14 17:33:37 tgl Exp $ * */ @@ -51,11 +51,9 @@ static void repeat(struct vars *, struct state *, struct state *, int, int); static void bracket(struct vars *, struct state *, struct state *); static void cbracket(struct vars *, struct state *, struct state *); static void brackpart(struct vars *, struct state *, struct state *); -static chr *scanplain(struct vars *); -static void leaders(struct vars *, struct cvec *); +static const chr *scanplain(struct vars *); static void onechr(struct vars *, chr, struct state *, struct state *); static void dovec(struct vars *, struct cvec *, struct state *, struct state *); -static celt nextleader(struct vars *, chr, chr); static void wordchrs(struct vars *); static struct subre *subre(struct vars *, int, int, struct state *, struct state *); static void freesubre(struct vars *, struct subre *); @@ -74,12 +72,12 @@ static void rfree(regex_t *); static void dump(regex_t *, FILE *); static void dumpst(struct subre *, FILE *, int); static void stdump(struct subre *, FILE *, int); -static char *stid(struct subre *, char *, size_t); +static const char *stid(struct subre *, char *, size_t); #endif /* === regc_lex.c === */ static void lexstart(struct vars *); static void prefixes(struct vars *); -static void lexnest(struct vars *, chr *, chr *); +static void lexnest(struct vars *, const chr *, const chr *); static void lexword(struct vars *); static int next(struct vars *); static int lexescape(struct vars *); @@ -87,7 +85,7 @@ static chr lexdigits(struct vars *, int, int, int); static int brenext(struct vars *, chr); static void skip(struct vars *); static chr newline(void); -static chr chrnamed(struct vars *, chr *, chr *, chr); +static chr chrnamed(struct vars *, const chr *, const chr *, chr); /* === regc_color.c === */ static void initcm(struct vars *, struct colormap *); @@ -105,7 +103,6 @@ static void subblock(struct vars *, chr, struct state *, struct state *); static void okcolors(struct nfa *, struct colormap *); static void colorchain(struct colormap *, struct arc *); static void uncolorchain(struct colormap *, struct arc *); -static int singleton(struct colormap *, chr c); static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); @@ -168,13 +165,11 @@ static void dumpcnfa(struct cnfa *, FILE *); static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ -static struct cvec *newcvec(int, int, int); +static struct cvec *newcvec(int, int); static struct cvec *clearcvec(struct cvec *); static void addchr(struct cvec *, chr); static void addrange(struct cvec *, chr, chr); -static void addmcce(struct cvec *, chr *, chr *); -static int haschr(struct cvec *, chr); -static struct cvec *getcvec(struct vars *, int, int, int); +static struct cvec *getcvec(struct vars *, int, int); static void freecvec(struct cvec *); /* === regc_locale.c === */ @@ -189,14 +184,11 @@ static int pg_wc_ispunct(pg_wchar c); static int pg_wc_isspace(pg_wchar c); static pg_wchar pg_wc_toupper(pg_wchar c); static pg_wchar pg_wc_tolower(pg_wchar c); -static int nmcces(struct vars *); -static int nleaders(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -static celt element(struct vars *, chr *, chr *); +static celt element(struct vars *, const chr *, const chr *); static struct cvec *range(struct vars *, celt, celt, int); static int before(celt, celt); static struct cvec *eclass(struct vars *, celt, int); -static struct cvec *cclass(struct vars *, chr *, chr *, int); +static struct cvec *cclass(struct vars *, const chr *, const chr *, int); static struct cvec *allcases(struct vars *, chr); static int cmp(const chr *, const chr *, size_t); static int casecmp(const chr *, const chr *, size_t); @@ -206,10 +198,10 @@ static int casecmp(const chr *, const chr *, size_t); struct vars { regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine call" */ - chr *savestop; + const chr *now; /* scan pointer into string */ + const chr *stop; /* end of string */ + const chr *savenow; /* saved now and stop for "subroutine call" */ + const chr *savestop; int err; /* error code (0 if none) */ int cflags; /* copy of compile flags */ int lasttype; /* type of previous token */ @@ -230,10 +222,6 @@ struct vars int ntree; /* number of tree nodes */ struct cvec *cv; /* interface cvec */ struct cvec *cv2; /* utility cvec */ - struct cvec *mcces; /* collating-element information */ -#define ISCELEADER(v,c) ((v)->mcces != NULL && haschr((v)->mcces, (c))) - struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ struct subre *lacons; /* lookahead-constraint vector */ int nlacons; /* size of lacons */ }; @@ -275,9 +263,8 @@ struct vars #define PREFER 'P' /* length preference */ /* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ - (a)->type == BEHIND) - +#define COLORED(a) \ + ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND) /* static function list */ @@ -322,7 +309,7 @@ pg_regcomp(regex_t *re, /* initial setup (after which freev() is callable) */ v->re = re; - v->now = (chr *) string; + v->now = string; v->stop = v->now + len; v->savenow = v->savestop = NULL; v->err = 0; @@ -341,7 +328,6 @@ pg_regcomp(regex_t *re, v->treefree = NULL; v->cv = NULL; v->cv2 = NULL; - v->mcces = NULL; v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; @@ -363,19 +349,9 @@ pg_regcomp(regex_t *re, ZAPCNFA(g->search); v->nfa = newnfa(v, v->cm, (struct nfa *) NULL); CNOERR(); - v->cv = newcvec(100, 20, 10); + v->cv = newcvec(100, 20); if (v->cv == NULL) return freev(v, REG_ESPACE); - i = nmcces(v); - if (i > 0) - { - v->mcces = newcvec(nleaders(v), 0, i); - CNOERR(); - v->mcces = allmcces(v, v->mcces); - leaders(v, v->mcces); - addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */ - } - CNOERR(); /* parsing */ lexstart(v); /* also handles prefixes */ @@ -525,8 +501,6 @@ freev(struct vars * v, freecvec(v->cv); if (v->cv2 != NULL) freecvec(v->cv2); - if (v->mcces != NULL) - freecvec(v->mcces); if (v->lacons != NULL) freelacons(v->lacons, v->nlacons); ERR(err); /* nop if err==0 */ @@ -583,15 +557,14 @@ makesearch(struct vars * v, for (b = s->ins; b != NULL; b = b->inchain) if (b->from != pre) break; - if (b != NULL) - { /* must be split */ - if (s->tmp == NULL) - { /* if not already in the list */ - /* (fixes bugs 505048, 230589, */ - /* 840258, 504785) */ - s->tmp = slist; - slist = s; - } + if (b != NULL && s->tmp == NULL) + { + /* + * Must be split if not already in the list (fixes bugs 505048, + * 230589, 840258, 504785). + */ + s->tmp = slist; + slist = s; } } @@ -1338,13 +1311,6 @@ cbracket(struct vars * v, { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); - struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - chr *p; - int i; NOERR(); bracket(v, left, right); @@ -1354,65 +1320,13 @@ cbracket(struct vars * v, assert(lp->nouts == 0); /* all outarcs will be ours */ - /* easy part of complementing */ + /* + * Easy part of complementing, and all there is to do since the MCCE code + * was removed. + */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (v->mcces == NULL) - { /* no MCCEs -- we're done */ - dropstate(v->nfa, left); - assert(right->nins == 0); - freestate(v->nfa, right); - return; - } - - /* but complementing gets messy in the presence of MCCEs... */ - NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) - { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - ba = findarc(left, PLAIN, co); - if (ba == NULL) - { - assert(a != NULL); - freearc(v->nfa, a); - } - else - assert(a == NULL); - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - if (ba == NULL) - { /* easy case, need all of them */ - cloneouts(v->nfa, pa->to, s, rp, PLAIN); - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } - else - { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) - { - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); - } - for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) - if (findarc(ba->to, PLAIN, pa->co) == NULL) - newarc(v->nfa, PLAIN, pa->co, s, rp); - if (s->nouts == 0) /* limit of selectivity: none */ - dropstate(v->nfa, s); /* frees arc too */ - } - NOERR(); - } - - delsub(v->nfa, left, right); - assert(left->nouts == 0); - freestate(v->nfa, left); + dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); } @@ -1428,8 +1342,8 @@ brackpart(struct vars * v, celt startc; celt endc; struct cvec *cv; - chr *startp; - chr *endp; + const chr *startp; + const chr *endp; chr c[1]; /* parse something, get rid of special cases, take shortcuts */ @@ -1442,8 +1356,8 @@ brackpart(struct vars * v, case PLAIN: c[0] = v->nextvalue; NEXT(); - /* shortcut for ordinary chr (not range, not MCCE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) + /* shortcut for ordinary chr (not range) */ + if (!SEE(RANGE)) { onechr(v, c[0], lp, rp); return; @@ -1533,10 +1447,10 @@ brackpart(struct vars * v, * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. */ -static chr * /* just after end of sequence */ +static const chr * /* just after end of sequence */ scanplain(struct vars * v) { - chr *endp; + const chr *endp; assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); NEXT(); @@ -1554,52 +1468,6 @@ scanplain(struct vars * v) return endp; } -/* - * leaders - process a cvec of collating elements to also include leaders - * Also gives all characters involved their own colors, which is almost - * certainly necessary, and sets up little disconnected subNFA. - */ -static void -leaders(struct vars * v, - struct cvec * cv) -{ - int mcce; - chr *p; - chr leader; - struct state *s; - struct arc *a; - - v->mccepbegin = newstate(v->nfa); - v->mccepend = newstate(v->nfa); - NOERR(); - - for (mcce = 0; mcce < cv->nmcces; mcce++) - { - p = cv->mcces[mcce]; - leader = *p; - if (!haschr(cv, leader)) - { - addchr(cv, leader); - s = newstate(v->nfa); - newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->mccepbegin, s); - okcolors(v->nfa, v->cm); - } - else - { - a = findarc(v->mccepbegin, PLAIN, - GETCOLOR(v->cm, leader)); - assert(a != NULL); - s = a->to; - assert(s != v->mccepend); - } - p++; - assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for now */ - newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); - okcolors(v->nfa, v->cm); - } -} - /* * onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. @@ -1622,7 +1490,6 @@ onechr(struct vars * v, /* * dovec - fill in arcs for each element of a cvec - * This one has to handle the messy cases, like MCCEs and MCCE leaders. */ static void dovec(struct vars * v, @@ -1633,47 +1500,14 @@ dovec(struct vars * v, chr ch, from, to; - celt ce; - chr *p; + const chr *p; int i; - color co; - struct cvec *leads; - struct arc *a; - struct arc *pa; /* arc in prototype */ - struct state *s; - struct state *ps; /* state in prototype */ - - /* need a place to store leaders, if any */ - if (nmcces(v) > 0) - { - assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) - { - if (v->cv2 != NULL) - free(v->cv2); - v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); - NOERR(); - leads = v->cv2; - } - else - leads = clearcvec(v->cv2); - } - else - leads = NULL; - /* first, get the ordinary characters out of the way */ + /* ordinary characters */ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { ch = *p; - if (!ISCELEADER(v, ch)) - newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - else - { - assert(singleton(v->cm, ch)); - assert(leads != NULL); - if (!haschr(leads, ch)) - addchr(leads, ch); - } + newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); } /* and the ranges */ @@ -1681,103 +1515,9 @@ dovec(struct vars * v, { from = *p; to = *(p + 1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) - { - if (from < ce) - subrange(v, from, ce - 1, lp, rp); - assert(singleton(v->cm, ce)); - assert(leads != NULL); - if (!haschr(leads, ce)) - addchr(leads, ce); - from = ce + 1; - } if (from <= to) subrange(v, from, to, lp, rp); } - - if ((leads == NULL || leads->nchrs == 0) && cv->nmcces == 0) - return; - - /* deal with the MCCE leaders */ - NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) - { - co = GETCOLOR(v->cm, *p); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else - { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - pa = findarc(v->mccepbegin, PLAIN, co); - assert(pa != NULL); - ps = pa->to; - newarc(v->nfa, '$', 1, s, rp); - newarc(v->nfa, '$', 0, s, rp); - colorcomplement(v->nfa, v->cm, AHEAD, ps, s, rp); - NOERR(); - } - - /* and the MCCEs */ - for (i = 0; i < cv->nmcces; i++) - { - p = cv->mcces[i]; - assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) - { - ERR(REG_ASSERT); - return; - } - ch = *p++; - co = GETCOLOR(v->cm, ch); - a = findarc(lp, PLAIN, co); - if (a != NULL) - s = a->to; - else - { - s = newstate(v->nfa); - NOERR(); - newarc(v->nfa, PLAIN, co, lp, s); - NOERR(); - } - assert(*p != 0); /* at least two chars */ - assert(singleton(v->cm, *p)); - ch = *p++; - co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ - newarc(v->nfa, PLAIN, co, s, rp); - NOERR(); - } -} - -/* - * nextleader - find next MCCE leader within range - */ -static celt /* NOCELT means none */ -nextleader(struct vars * v, - chr from, - chr to) -{ - int i; - chr *p; - chr ch; - celt it = NOCELT; - - if (v->mcces == NULL) - return it; - - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) - { - ch = *p; - if (from <= ch && ch <= to) - if (it == NOCELT || ch < it) - it = ch; - } - return it; } /* @@ -1825,9 +1565,8 @@ subre(struct vars * v, struct state * begin, struct state * end) { - struct subre *ret; + struct subre *ret = v->treefree; - ret = v->treefree; if (ret != NULL) v->treefree = ret->left; else @@ -1906,14 +1645,13 @@ static void optst(struct vars * v, struct subre * t) { - if (t == NULL) - return; - - /* recurse through children */ - if (t->left != NULL) - optst(v, t->left); - if (t->right != NULL) - optst(v, t->right); + /* + * DGP (2007-11-13): I assume it was the programmer's intent to eventually + * come back and add code to optimize subRE trees, but the routine coded + * just spends effort traversing the tree and doing nothing. We can do + * nothing with less effort. + */ + return; } /* @@ -2207,8 +1945,8 @@ stdump(struct subre * t, { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); - fprintf(f, "\n"); } + fprintf(f, "\n"); if (t->left != NULL) stdump(t->left, f, nfapresent); if (t->right != NULL) @@ -2218,7 +1956,7 @@ stdump(struct subre * t, /* * stid - identify a subtree node for dumping */ -static char * /* points to buf or constant string */ +static const char * /* points to buf or constant string */ stid(struct subre * t, char *buf, size_t bufsize) diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index 3b20446350..dfcb462e01 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.27 2003/11/29 19:51:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regerror.c,v 1.28 2008/02/14 17:33:37 tgl Exp $ * */ @@ -40,8 +40,8 @@ static char unk[] = "*** unknown regex error code 0x%x ***"; static struct rerr { int code; - char *name; - char *explain; + const char *name; + const char *explain; } rerrs[] = { @@ -63,7 +63,7 @@ pg_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ size_t errbuf_size) /* available space in errbuf, can be 0 */ { struct rerr *r; - char *msg; + const char *msg; char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */ size_t len; int icode; diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h index 8471fb0b1c..269f926be8 100644 --- a/src/include/regex/regcustom.h +++ b/src/include/regex/regcustom.h @@ -25,7 +25,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.6 2007/10/06 16:01:51 tgl Exp $ + * $PostgreSQL: pgsql/src/include/regex/regcustom.h,v 1.7 2008/02/14 17:33:37 tgl Exp $ */ /* headers if any */ @@ -47,9 +47,9 @@ /* internal character type and related */ typedef pg_wchar chr; /* the type itself */ typedef unsigned uchr; /* unsigned type that will hold a chr */ -typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ +typedef int celt; /* type to hold chr, or NOCELT */ -#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ +#define NOCELT (-1) /* celt value which is not valid chr */ #define CHR(c) ((unsigned char) (c)) /* turn char literal into chr literal */ #define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ #define CHRBITS 32 /* bits in a chr; must not use sizeof */ diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 327808338a..52f2157535 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.6 2008/01/03 20:47:55 tgl Exp $ + * $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.7 2008/02/14 17:33:37 tgl Exp $ */ @@ -181,7 +181,7 @@ union tree #define tcolor colors.ccolor #define tptr ptrs.pptr -/* internal per-color structure for the color machinery */ +/* internal per-color descriptor structure for the color machinery */ struct colordesc { uchr nchrs; /* number of chars of this color */ @@ -228,11 +228,11 @@ struct colormap #endif - /* * Interface definitions for locale-interface functions in locale.c. - * Multi-character collating elements (MCCEs) cause most of the trouble. */ + +/* Representation of a set of characters. */ struct cvec { int nchrs; /* number of chrs */ @@ -241,17 +241,9 @@ struct cvec int nranges; /* number of ranges (chr pairs) */ int rangespace; /* number of chrs possible */ chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ + /* both batches of chrs are on the end */ }; -/* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ - - /* * definitions for NFA internal representation -- 2.40.0