From 6a7153661d66a00a03ff117c24fa49480b0699c8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 16 Oct 2015 15:11:49 -0400 Subject: [PATCH] Improve performance of pullback/pushfwd in regular-expression compiler. The previous coding would create a new intermediate state every time it wanted to interchange the ordering of two constraint arcs. Certain regex features such as \Y can generate large numbers of parallel constraint arcs, and if we needed to reorder the results of that, we created unreasonable numbers of intermediate states. To improve matters, keep a list of already-created intermediate states associated with the state currently being considered by the outer loop; we can re-use such states to place all the new arcs leading to the same destination or source. I also took the trouble to redefine push() and pull() to have a less risky API: they no longer delete any state or arc that the caller might possibly have a pointer to, except for the specifically-passed constraint arc. This reduces the risk of re-introducing the same type of error seen in the failed patch for CVE-2007-4772. Back-patch to all supported branches. --- src/backend/regex/regc_nfa.c | 150 +++++++++++++++++++++++++++-------- src/backend/regex/regcomp.c | 4 +- 2 files changed, 119 insertions(+), 35 deletions(-) diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 4c03ff1403..ecc91a8ede 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -1513,6 +1513,7 @@ pullback(struct nfa * nfa, struct state *nexts; struct arc *a; struct arc *nexta; + struct state *intermediates; int progress; /* find and pull until there are no more */ @@ -1522,14 +1523,25 @@ pullback(struct nfa * nfa, for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; + intermediates = NULL; for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == '^' || a->type == BEHIND) - if (pull(nfa, a)) + if (pull(nfa, a, &intermediates)) progress = 1; - assert(nexta == NULL || s->no != FREESTATE); } + /* clear tmp fields of intermediate states created here */ + while (intermediates != NULL) + { + struct state *ns = intermediates->tmp; + + intermediates->tmp = NULL; + intermediates = ns; + } + /* if s is now useless, get rid of it */ + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } if (progress && f != NULL) dumpnfa(nfa, f); @@ -1557,13 +1569,26 @@ pullback(struct nfa * nfa, /* * pull - pull a back constraint backward past its source state - * A significant property of this function is that it deletes at most - * one state -- the constraint's from state -- and only if the constraint - * was that state's last outarc. + * + * Returns 1 if successful (which it always is unless the source is the + * start state or we have an internal error), 0 if nothing happened. + * + * A significant property of this function is that it deletes no pre-existing + * states, and no outarcs of the constraint's from state other than the given + * constraint arc. This makes the loops in pullback() safe, at the cost that + * we may leave useless states behind. Therefore, we leave it to pullback() + * to delete such states. + * + * If the from state has multiple back-constraint outarcs, and/or multiple + * compatible constraint inarcs, we only need to create one new intermediate + * state per combination of predecessor and successor states. *intermediates + * points to a list of such intermediate states for this from state (chained + * through their tmp fields). */ -static int /* 0 couldn't, 1 could */ +static int pull(struct nfa * nfa, - struct arc * con) + struct arc * con, + struct state ** intermediates) { struct state *from = con->from; struct state *to = con->to; @@ -1580,7 +1605,11 @@ pull(struct nfa * nfa, return 1; } - /* first, clone from state if necessary to avoid other outarcs */ + /* + * First, clone from state if necessary to avoid other outarcs. This may + * seem wasteful, but it simplifies the logic, and we'll get rid of the + * clone state again at the bottom. + */ if (from->nouts > 1) { s = newstate(nfa); @@ -1589,13 +1618,15 @@ pull(struct nfa * nfa, copyins(nfa, from, s, 1); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); + if (NISERR()) + return 0; from = s; con = from->outs; } assert(from->nouts == 1); /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) + for (a = from->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; switch (combine(con, a)) @@ -1606,13 +1637,23 @@ pull(struct nfa * nfa, case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ + /* need an intermediate state, but might have one already */ + for (s = *intermediates; s != NULL; s = s->tmp) + { + assert(s->nins > 0 && s->nouts > 0); + if (s->ins->from == a->from && s->outs->to == to) + break; + } + if (s == NULL) + { + s = newstate(nfa); + if (NISERR()) + return 0; + s->tmp = *intermediates; + *intermediates = s; + } cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; + cparc(nfa, a, s, to); freearc(nfa, a); break; default: @@ -1623,7 +1664,8 @@ pull(struct nfa * nfa, /* remaining inarcs, if any, incorporate the constraint */ moveins(nfa, from, to); - dropstate(nfa, from); /* will free the constraint */ + freearc(nfa, con); + /* from state is now useless, but we leave it to pullback() to clean up */ return 1; } @@ -1638,6 +1680,7 @@ pushfwd(struct nfa * nfa, struct state *nexts; struct arc *a; struct arc *nexta; + struct state *intermediates; int progress; /* find and push until there are no more */ @@ -1647,14 +1690,25 @@ pushfwd(struct nfa * nfa, for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; + intermediates = NULL; for (a = s->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; if (a->type == '$' || a->type == AHEAD) - if (push(nfa, a)) + if (push(nfa, a, &intermediates)) progress = 1; - assert(nexta == NULL || s->no != FREESTATE); } + /* clear tmp fields of intermediate states created here */ + while (intermediates != NULL) + { + struct state *ns = intermediates->tmp; + + intermediates->tmp = NULL; + intermediates = ns; + } + /* if s is now useless, get rid of it */ + if ((s->nins == 0 || s->nouts == 0) && !s->flag) + dropstate(nfa, s); } if (progress && f != NULL) dumpnfa(nfa, f); @@ -1682,13 +1736,26 @@ pushfwd(struct nfa * nfa, /* * push - push a forward constraint forward past its destination state - * A significant property of this function is that it deletes at most - * one state -- the constraint's to state -- and only if the constraint - * was that state's last inarc. + * + * Returns 1 if successful (which it always is unless the destination is the + * post state or we have an internal error), 0 if nothing happened. + * + * A significant property of this function is that it deletes no pre-existing + * states, and no inarcs of the constraint's to state other than the given + * constraint arc. This makes the loops in pushfwd() safe, at the cost that + * we may leave useless states behind. Therefore, we leave it to pushfwd() + * to delete such states. + * + * If the to state has multiple forward-constraint inarcs, and/or multiple + * compatible constraint outarcs, we only need to create one new intermediate + * state per combination of predecessor and successor states. *intermediates + * points to a list of such intermediate states for this to state (chained + * through their tmp fields). */ -static int /* 0 couldn't, 1 could */ +static int push(struct nfa * nfa, - struct arc * con) + struct arc * con, + struct state ** intermediates) { struct state *from = con->from; struct state *to = con->to; @@ -1705,22 +1772,28 @@ push(struct nfa * nfa, return 1; } - /* first, clone to state if necessary to avoid other inarcs */ + /* + * First, clone to state if necessary to avoid other inarcs. This may + * seem wasteful, but it simplifies the logic, and we'll get rid of the + * clone state again at the bottom. + */ if (to->nins > 1) { s = newstate(nfa); if (NISERR()) return 0; copyouts(nfa, to, s, 1); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ + cparc(nfa, con, from, s); /* move constraint arc */ freearc(nfa, con); + if (NISERR()) + return 0; to = s; con = to->ins; } assert(to->nins == 1); /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) + for (a = to->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; switch (combine(con, a)) @@ -1731,13 +1804,23 @@ push(struct nfa * nfa, case SATISFIED: /* no action needed */ break; case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ + /* need an intermediate state, but might have one already */ + for (s = *intermediates; s != NULL; s = s->tmp) + { + assert(s->nins > 0 && s->nouts > 0); + if (s->ins->from == from && s->outs->to == a->to) + break; + } + if (s == NULL) + { + s = newstate(nfa); + if (NISERR()) + return 0; + s->tmp = *intermediates; + *intermediates = s; + } + cparc(nfa, con, s, a->to); cparc(nfa, a, from, s); - if (NISERR()) - return 0; freearc(nfa, a); break; default: @@ -1748,7 +1831,8 @@ push(struct nfa * nfa, /* remaining outarcs, if any, incorporate the constraint */ moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ + freearc(nfa, con); + /* to state is now useless, but we leave it to pushfwd() to clean up */ return 1; } diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 91eb77124c..6b031075e7 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -149,9 +149,9 @@ static void cleartraverse(struct nfa *, struct state *); static void specialcolors(struct nfa *); static long optimize(struct nfa *, FILE *); static void pullback(struct nfa *, FILE *); -static int pull(struct nfa *, struct arc *); +static int pull(struct nfa *, struct arc *, struct state **); static void pushfwd(struct nfa *, FILE *); -static int push(struct nfa *, struct arc *); +static int push(struct nfa *, struct arc *, struct state **); #define INCOMPATIBLE 1 /* destroys arc */ #define SATISFIED 2 /* constraint satisfied */ -- 2.40.0