From b44d92b67b65a76f92448b5a282aae72820ac676 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 16 Sep 2015 15:25:25 -0400 Subject: [PATCH] Sync regex code with Tcl 8.6.4. Sync our regex code with upstream changes since last time we did this, which was Tcl 8.5.11 (see commit 08fd6ff37f71485e2fc04bc6ce07d2a483c36702). The only functional change here is to disbelieve that an octal escape is three digits long if it would exceed \377. That's a bug fix, but it's a minor one and could change the interpretation of working regexes, so don't back-patch. In addition to that, s/INFINITY/DUPINF/ to eliminate the risk of collisions with 's macro, and s/LOCAL/NOPROP/ because that also seems like an unnecessarily collision-prone macro name. There were some other cosmetic changes in their copy that I did not adopt, notably a rather half-hearted attempt at renaming some of the C functions in a more verbose style. (I'm not necessarily against the concept, but renaming just a few functions in the package is not an improvement.) --- src/backend/regex/regc_lex.c | 6 ++++++ src/backend/regex/regcomp.c | 14 +++++++------- src/backend/regex/regexec.c | 6 +++--- src/include/regex/regguts.h | 13 +++++-------- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 6f2c0cb3eb..f6ed9f09ea 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -860,6 +860,12 @@ lexescape(struct vars * v) c = lexdigits(v, 8, 1, 3); if (ISERR()) FAILW(REG_EESCAPE); + if (c > 0xff) + { + /* out of range, so we handled one digit too much */ + v->now--; + c >>= 3; + } RETV(PLAIN, c); break; default: diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 44a472fa69..6b95975f2b 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -960,13 +960,13 @@ parseqatom(struct vars * v, { case '*': m = 0; - n = INFINITY; + n = DUPINF; qprefer = (v->nextvalue) ? LONGER : SHORTER; NEXT(); break; case '+': m = 1; - n = INFINITY; + n = DUPINF; qprefer = (v->nextvalue) ? LONGER : SHORTER; NEXT(); break; @@ -984,7 +984,7 @@ parseqatom(struct vars * v, if (SEE(DIGIT)) n = scannum(v); else - n = INFINITY; + n = DUPINF; if (m > n) { ERR(REG_BADBR); @@ -1146,8 +1146,8 @@ parseqatom(struct vars * v, * really care where its submatches are. */ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); - assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1); + assert(m >= 1 && m != DUPINF && n >= 1); + repeat(v, s, atom->begin, m - 1, (n == DUPINF) ? n : n - 1); f = COMBINE(qprefer, atom->flags); t = subre(v, '.', f, s, atom->end); /* prefix and atom */ NOERR(); @@ -1268,7 +1268,7 @@ repeat(struct vars * v, #define SOME 2 #define INF 3 #define PAIR(x, y) ((x)*4 + (y)) -#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) +#define REDUCE(x) ( ((x) == DUPINF) ? INF : (((x) > 1) ? SOME : (x)) ) const int rm = REDUCE(m); const int rn = REDUCE(n); struct state *s; @@ -2026,7 +2026,7 @@ stdump(struct subre * t, if (t->min != 1 || t->max != 1) { fprintf(f, " {%d,", t->min); - if (t->max != INFINITY) + if (t->max != DUPINF) fprintf(f, "%d", t->max); fprintf(f, "}"); } diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 5e78f8149c..efd1e9ba02 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -865,7 +865,7 @@ cbrdissect(struct vars * v, if (tlen % brlen != 0) return REG_NOMATCH; numreps = tlen / brlen; - if (numreps < min || (numreps > max && max != INFINITY)) + if (numreps < min || (numreps > max && max != DUPINF)) return REG_NOMATCH; /* okay, compare the actual string contents */ @@ -964,7 +964,7 @@ citerdissect(struct vars * v, * sub-match endpoints in endpts[1..max_matches]. */ max_matches = end - begin; - if (max_matches > t->max && t->max != INFINITY) + if (max_matches > t->max && t->max != DUPINF) max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; @@ -1149,7 +1149,7 @@ creviterdissect(struct vars * v, * sub-match endpoints in endpts[1..max_matches]. */ max_matches = end - begin; - if (max_matches > t->max && t->max != INFINITY) + if (max_matches > t->max && t->max != DUPINF) max_matches = t->max; if (max_matches < min_matches) max_matches = min_matches; diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 2f3be1aa0a..94e06f041b 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -78,9 +78,6 @@ #endif /* want size of a char in bits, and max value in bounded quantifiers */ -#ifndef CHAR_BIT -#include -#endif #ifndef _POSIX2_RE_DUP_MAX #define _POSIX2_RE_DUP_MAX 255 /* normally from */ #endif @@ -95,7 +92,7 @@ #define xxx 1 #define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) +#define DUPINF (DUPMAX+1) #define REMAGIC 0xfed7 /* magic number for main struct */ @@ -419,15 +416,15 @@ struct subre #define LONGER 01 /* prefers longer match */ #define SHORTER 02 /* prefers shorter match */ #define MIXED 04 /* mixed preference below */ -#define CAP 010 /* capturing parens below */ +#define CAP 010 /* capturing parens below */ #define BACKR 020 /* back reference below */ #define INUSE 0100 /* in use in final tree */ -#define LOCAL 03 /* bits which may not propagate up */ +#define NOPROP 03 /* bits which may not propagate up */ #define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ #define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) +#define UP(f) (((f)&~NOPROP) | (LMIX(f) & SMIX(f) & MIXED)) #define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -#define PREF(f) ((f)&LOCAL) +#define PREF(f) ((f)&NOPROP) #define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) #define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) short id; /* ID of subre (1..ntree-1) */ -- 2.40.0