From 03e6423fc1eb45af18d49fee741a5536873ff626 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 1 Mar 2014 15:21:04 -0500 Subject: [PATCH] Allow regex operations to be terminated early by query cancel requests. The regex code didn't have any provision for query cancel; which is unsurprising given its non-Postgres origin, but still problematic since some operations can take a long time. Introduce a callback function to check for a pending query cancel or session termination request, and call it in a couple of strategic spots where we can make the regex code exit with an error indicator. If we ever actually split out the regex code as a standalone library, some additional work will be needed to let the cancel callback function be specified externally to the library. But that's straightforward (certainly so by comparison to putting the locale-dependent character classification logic on a similar arms-length basis), and there seems no need to do it right now. A bigger issue is that there may be more places than these two where we need to check for cancels. We can always add more checks later, now that the infrastructure is in place. Since there are known examples of not-terribly-long regexes that can lock up a backend for a long time, back-patch to all supported branches. I have hopes of fixing the known performance problems later, but adding query cancel ability seems like a good idea even if they were all fixed. --- src/backend/regex/regc_nfa.c | 12 ++++++++++++ src/backend/regex/regcomp.c | 20 ++++++++++++++++++++ src/backend/regex/regexec.c | 4 ++++ src/backend/utils/adt/regexp.c | 12 ++++++++++++ src/backend/utils/adt/varlena.c | 1 + src/include/regex/regerrs.h | 4 ++++ src/include/regex/regex.h | 1 + src/include/regex/regguts.h | 3 +++ 8 files changed, 57 insertions(+) diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 05fe8b0808..170da5954c 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -174,11 +174,23 @@ newstate(struct nfa * nfa) { struct state *s; + /* + * This is a handy place to check for operation cancel during regex + * compilation, since no code path will go very long without making a new + * state. + */ + if (CANCEL_REQUESTED(nfa->v->re)) + { + NERR(REG_CANCEL); + return NULL; + } + if (TooManyStates(nfa)) { NERR(REG_ETOOBIG); return NULL; } + if (nfa->free != NULL) { s = nfa->free; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index b5988a2fbc..bd2bb60f5b 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -34,6 +34,8 @@ #include "regex/regguts.h" +#include "miscadmin.h" /* needed by rcancelrequested() */ + /* * forward declarations, up here so forward datatypes etc. are defined early */ @@ -67,6 +69,7 @@ static long nfanode(struct vars *, struct subre *, FILE *); static int newlacon(struct vars *, struct state *, struct state *, int); static void freelacons(struct subre *, int); static void rfree(regex_t *); +static int rcancelrequested(void); #ifdef REG_DEBUG static void dump(regex_t *, FILE *); @@ -276,6 +279,7 @@ struct vars /* static function list */ static struct fns functions = { rfree, /* regfree insides */ + rcancelrequested /* check for cancel request */ }; @@ -1893,6 +1897,22 @@ rfree(regex_t *re) } } +/* + * rcancelrequested - check for external request to cancel regex operation + * + * Return nonzero to fail the operation with error code REG_CANCEL, + * zero to keep going + * + * The current implementation is Postgres-specific. If we ever get around + * to splitting the regex code out as a standalone library, there will need + * to be some API to let applications define a callback function for this. + */ +static int +rcancelrequested(void) +{ + return InterruptPending && (QueryCancelPending || ProcDiePending); +} + #ifdef REG_DEBUG /* diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 78ebdee39e..0edb83c109 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -595,6 +595,10 @@ cdissect(struct vars * v, assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); + /* handy place to check for operation cancel */ + if (CANCEL_REQUESTED(v->re)) + return REG_CANCEL; + switch (t->op) { case '=': /* terminal node */ diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 4e79b90467..b5a235ec9f 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -31,6 +31,7 @@ #include "catalog/pg_type.h" #include "funcapi.h" +#include "miscadmin.h" #include "regex/regex.h" #include "utils/array.h" #include "utils/builtins.h" @@ -188,6 +189,15 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) if (regcomp_result != REG_OKAY) { /* re didn't compile (no need for pg_regfree, if so) */ + + /* + * Here and in other places in this file, do CHECK_FOR_INTERRUPTS + * before reporting a regex error. This is so that if the regex + * library aborts and returns REG_CANCEL, we don't print an error + * message that implies the regex was invalid. + */ + CHECK_FOR_INTERRUPTS(); + pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), @@ -268,6 +278,7 @@ RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) { /* re failed??? */ + CHECK_FOR_INTERRUPTS(); pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), @@ -1216,6 +1227,7 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, default: /* re failed??? */ + CHECK_FOR_INTERRUPTS(); pg_regerror(re_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 09f4e10652..2d2e078798 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2913,6 +2913,7 @@ replace_text_regexp(text *src_text, void *regexp, { char errMsg[100]; + CHECK_FOR_INTERRUPTS(); pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h index f02711ee17..809b511266 100644 --- a/src/include/regex/regerrs.h +++ b/src/include/regex/regerrs.h @@ -81,3 +81,7 @@ { REG_ECOLORS, "REG_ECOLORS", "too many colors" }, + +{ + REG_CANCEL, "REG_CANCEL", "operation cancelled" +}, diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index 3e87dff17b..2c7fa4df46 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -154,6 +154,7 @@ typedef struct #define REG_BADOPT 18 /* invalid embedded option */ #define REG_ETOOBIG 19 /* nfa has too many states */ #define REG_ECOLORS 20 /* too many colors */ +#define REG_CANCEL 21 /* operation cancelled */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 4e04abfa6f..b5fe33c87f 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -446,8 +446,11 @@ struct subre struct fns { void FUNCPTR(free, (regex_t *)); + int FUNCPTR(cancel_requested, (void)); }; +#define CANCEL_REQUESTED(re) \ + ((*((struct fns *) (re)->re_fns)->cancel_requested) ()) /* -- 2.40.0