/*
* longest - longest-preferred matching engine
+ *
+ * On success, returns match endpoint address. Returns NULL on no match.
+ * Internal errors also return NULL, with v->err set.
*/
-static chr * /* endpoint, or NULL */
-longest(struct vars * v, /* used only for debug and exec flags */
+static chr *
+longest(struct vars * v,
struct dfa * d,
chr *start, /* where the match should start */
chr *stop, /* match must end at or before here */
int i;
struct colormap *cm = d->cm;
+ /* prevent "uninitialized variable" warnings */
+ if (hitstopp != NULL)
+ *hitstopp = 0;
+
/* initialize */
css = initialize(v, d, start);
+ if (css == NULL)
+ return NULL;
cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
/* startup */
FDEBUG(("+++ startup +++\n"));
return NULL;
css->lastseen = cp;
- /* main loop */
+ /*
+ * This is the main text-scanning loop. It seems worth having two copies
+ * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+ * builds, when you're not actively tracing.
+ */
+#ifdef REG_DEBUG
if (v->eflags & REG_FTRACE)
+ {
while (cp < realstop)
{
FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
ss->lastseen = cp;
css = ss;
}
+ }
else
+#endif
+ {
while (cp < realstop)
{
co = GETCOLOR(cm, *cp);
ss->lastseen = cp;
css = ss;
}
+ }
+
+ if (ISERR())
+ return NULL;
/* shutdown */
FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
FDEBUG(("color %ld\n", (long) co));
ss = miss(v, d, css, co, cp, start);
+ if (ISERR())
+ return NULL;
/* special case: match ended at eol? */
if (ss != NULL && (ss->flags & POSTSTATE))
return cp;
/*
* shortest - shortest-preferred matching engine
+ *
+ * On success, returns match endpoint address. Returns NULL on no match.
+ * Internal errors also return NULL, with v->err set.
*/
-static chr * /* endpoint, or NULL */
+static chr *
shortest(struct vars * v,
struct dfa * d,
chr *start, /* where the match should start */
chr *min, /* match must end at or after here */
chr *max, /* match must end at or before here */
- chr **coldp, /* store coldstart pointer here, if nonNULL */
+ chr **coldp, /* store coldstart pointer here, if non-NULL */
int *hitstopp) /* record whether hit v->stop, if non-NULL */
{
chr *cp;
struct sset *ss;
struct colormap *cm = d->cm;
+ /* prevent "uninitialized variable" warnings */
+ if (coldp != NULL)
+ *coldp = NULL;
+ if (hitstopp != NULL)
+ *hitstopp = 0;
+
/* initialize */
css = initialize(v, d, start);
+ if (css == NULL)
+ return NULL;
cp = start;
- if (hitstopp != NULL)
- *hitstopp = 0;
/* startup */
FDEBUG(("--- startup ---\n"));
css->lastseen = cp;
ss = css;
- /* main loop */
+ /*
+ * This is the main text-scanning loop. It seems worth having two copies
+ * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+ * builds, when you're not actively tracing.
+ */
+#ifdef REG_DEBUG
if (v->eflags & REG_FTRACE)
+ {
while (cp < realmax)
{
FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
if ((ss->flags & POSTSTATE) && cp >= realmin)
break; /* NOTE BREAK OUT */
}
+ }
else
+#endif
+ {
while (cp < realmax)
{
co = GETCOLOR(cm, *cp);
if ((ss->flags & POSTSTATE) && cp >= realmin)
break; /* NOTE BREAK OUT */
}
+ }
if (ss == NULL)
return NULL;
* initialize - hand-craft a cache entry for startup, otherwise get ready
*/
static struct sset *
-initialize(struct vars * v, /* used only for debug flags */
+initialize(struct vars * v,
struct dfa * d,
chr *start)
{
else
{ /* no, must (re)build it */
ss = getvacant(v, d, start, start);
+ if (ss == NULL)
+ return NULL;
for (i = 0; i < d->wordsper; i++)
ss->states[i] = 0;
BSET(ss->states, d->cnfa->pre);
}
/*
- * miss - handle a cache miss
+ * miss - handle a stateset cache miss
+ *
+ * css is the current stateset, co is the color of the current input character,
+ * cp points to the character after that (which is where we may need to test
+ * LACONs). start does not affect matching behavior but is needed for pickss'
+ * heuristics about which stateset cache entry to replace.
+ *
+ * Ordinarily, returns the address of the next stateset (the one that is
+ * valid after consuming the input character). Returns NULL if no valid
+ * NFA states remain, ie we have a certain match failure.
+ * Internal errors also return NULL, with v->err set.
*/
-static struct sset * /* NULL if goes to empty set */
-miss(struct vars * v, /* used only for debug flags */
+static struct sset *
+miss(struct vars * v,
struct dfa * d,
struct sset * css,
pcolor co,
}
FDEBUG(("miss\n"));
- /* first, what set of states would we end up in? */
+ /*
+ * Checking for operation cancel in the inner text search loop seems
+ * unduly expensive. As a compromise, check during cache misses.
+ */
+ if (CANCEL_REQUESTED(v->re))
+ {
+ ERR(REG_CANCEL);
+ return NULL;
+ }
+
+ /*
+ * What set of states would we end up in after consuming the co character?
+ * We first consider PLAIN arcs that consume the character, and then look
+ * to see what LACON arcs could be traversed after consuming it.
+ */
for (i = 0; i < d->wordsper; i++)
- d->work[i] = 0;
+ d->work[i] = 0; /* build new stateset bitmap in d->work */
ispost = 0;
noprogress = 1;
gotstate = 0;
noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
- dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0;
+ if (!gotstate)
+ return NULL; /* character cannot reach any new state */
+ dolacons = (cnfa->flags & HASLACONS);
sawlacons = 0;
+ /* outer loop handles transitive closure of reachable-by-LACON states */
while (dolacons)
- { /* transitive closure */
+ {
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
{
if (ca->co < cnfa->ncolors)
- continue; /* NOTE CONTINUE */
- sawlacons = 1;
+ continue; /* not a LACON arc */
if (ISBSET(d->work, ca->to))
- continue; /* NOTE CONTINUE */
+ continue; /* arc would be a no-op anyway */
+ sawlacons = 1; /* this LACON affects our result */
if (!lacon(v, cnfa, cp, ca->co))
- continue; /* NOTE CONTINUE */
+ {
+ if (ISERR())
+ return NULL;
+ continue; /* LACON arc cannot be traversed */
+ }
+ if (ISERR())
+ return NULL;
BSET(d->work, ca->to);
dolacons = 1;
if (ca->to == cnfa->post)
FDEBUG(("%d :> %d\n", i, ca->to));
}
}
- if (!gotstate)
- return NULL;
h = HASH(d->work, d->wordsper);
- /* next, is that in the cache? */
+ /* Is this stateset already in the cache? */
for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
if (HIT(h, d->work, p, d->wordsper))
{
if (i == 0)
{ /* nope, need a new cache entry */
p = getvacant(v, d, cp, start);
+ if (p == NULL)
+ return NULL;
assert(p != css);
for (i = 0; i < d->wordsper; i++)
p->states[i] = d->work[i];
/* lastseen to be dealt with by caller */
}
+ /*
+ * Link new stateset to old, unless a LACON affected the result, in which
+ * case we don't create the link. That forces future transitions across
+ * this same arc (same prior stateset and character color) to come through
+ * miss() again, so that we can recheck the LACON(s), which might or might
+ * not pass since context will be different.
+ */
if (!sawlacons)
- { /* lookahead conds. always cache miss */
+ {
FDEBUG(("c%d[%d]->c%d\n",
(int) (css - d->ssets), co, (int) (p - d->ssets)));
css->outs[co] = p;
/*
* getvacant - get a vacant state set
+ *
* This routine clears out the inarcs and outarcs, but does not otherwise
* clear the innards of the state set -- that's up to the caller.
*/
static struct sset *
-getvacant(struct vars * v, /* used only for debug flags */
+getvacant(struct vars * v,
struct dfa * d,
chr *cp,
chr *start)
color co;
ss = pickss(v, d, cp, start);
+ if (ss == NULL)
+ return NULL;
assert(!(ss->flags & LOCKED));
/* clear out its inarcs, including self-referential ones */
* pickss - pick the next stateset to be used
*/
static struct sset *
-pickss(struct vars * v, /* used only for debug flags */
+pickss(struct vars * v,
struct dfa * d,
chr *cp,
chr *start)
/* nobody's old enough?!? -- something's really wrong */
FDEBUG(("cannot find victim to replace!\n"));
- assert(NOTREACHED);
ERR(REG_ASSERT);
- return d->ssets;
+ return NULL;
}
{
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
+ if (ISERR())
+ {
+ *coldp = cold;
+ return v->err;
+ }
if (close == NULL)
break; /* NOTE BREAK */
assert(cold != NULL);
else
end = longest(v, d, begin, estop,
&hitend);
+ if (ISERR())
+ {
+ *coldp = cold;
+ return v->err;
+ }
if (hitend && cold == NULL)
cold = begin;
if (end == NULL)
mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL)
{
+ ERR(REG_ASSERT); /* if no other error reported already */
freedfa(d);
freedfa(d2);
- return REG_ASSERT;
+ return v->err;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
while (longest(v, d2, mid, end, (int *) NULL) != end)
{
+ if (ISERR())
+ {
+ freedfa(d);
+ freedfa(d2);
+ return v->err;
+ }
/* that midpoint didn't work, find a new one */
if (mid == stop)
{
{
/* failed to find a new one! */
MDEBUG(("failed midpoint!\n"));
+ ERR(REG_ASSERT); /* if no other error reported already */
freedfa(d);
freedfa(d2);
- return REG_ASSERT;
+ return v->err;
}
MDEBUG(("new midpoint %ld\n", LOFF(mid)));
}
MDEBUG(("trying %dth\n", i));
assert(t->left != NULL && t->left->cnfa.nstates > 0);
d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
- if (ISERR())
- return v->err;
+ NOERR();
if (longest(v, d, begin, end, (int *) NULL) == end)
{
MDEBUG(("success\n"));
return dissect(v, t->left, begin, end);
}
freedfa(d);
+ NOERR();
}
return REG_ASSERT; /* none of them matched?!? */
}
{
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
}
/* that midpoint didn't work, find a new one */
- if (mid == begin)
+ if (ISERR() || mid == begin)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->retry));
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
mid = longest(v, d, begin, mid - 1, (int *) NULL);
MDEBUG(("%d failed midpoint\n", t->retry));
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
{
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
}
/* that midpoint didn't work, find a new one */
- if (mid == end)
+ if (ISERR() || mid == end)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->retry));
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
MDEBUG(("%d failed midpoint\n", t->retry));
freedfa(d);
freedfa(d2);
+ NOERR();
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
if (longest(v, d, begin, end, (int *) NULL) != end)
{
freedfa(d);
+ NOERR();
v->mem[t->retry] = TRIED;
return caltdissect(v, t->right, begin, end);
}