* In this implementation, the NFA defines a necessary but not sufficient
* condition for a string to match the regex: that is, there can be strings
* that match the NFA but don't match the full regex, but not vice versa.
- * Thus, for example, it is okay for the functions below to ignore lookaround
- * constraints, which merely constrain the string some more.
+ * Thus, for example, it is okay for the functions below to treat lookaround
+ * constraints as no-ops, since they merely constrain the string some more.
*
* Notice that these functions return info into caller-provided arrays
* rather than doing their own malloc's. This simplifies the APIs by
#include "regex/regexport.h"
+#include "miscadmin.h"
+
static void scancolormap(struct colormap * cm, int co,
union tree * t, int level, chr partial,
pg_wchar **chars, int *chars_len);
}
/*
- * Get number of outgoing NFA arcs of state number "st".
+ * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
+ * arcs from the caller, treating any LACON as being automatically satisfied.
+ * Since the output representation does not support arcs that consume no
+ * character when traversed, we have to recursively traverse LACON arcs here,
+ * and report whatever normal arcs are reachable by traversing LACON arcs.
+ * Note that this wouldn't work if it were possible to reach the final state
+ * via LACON traversal, but the regex library never builds NFAs that have
+ * LACON arcs leading directly to the final state. (This is because the
+ * regex executor is designed to consume one character beyond the nominal
+ * match end --- possibly an EOS indicator --- so there is always a set of
+ * ordinary arcs leading to the final state.)
*
- * Note: LACON arcs are ignored, both here and in pg_reg_getoutarcs().
+ * traverse_lacons is a recursive subroutine used by both exported functions
+ * to count and then emit the reachable regular arcs. *arcs_count is
+ * incremented by the number of reachable arcs, and as many as will fit in
+ * arcs_len (possibly 0) are emitted into arcs[].
+ */
+static void
+traverse_lacons(struct cnfa * cnfa, int st,
+ int *arcs_count,
+ regex_arc_t *arcs, int arcs_len)
+{
+ struct carc *ca;
+
+ /*
+ * Since this function recurses, it could theoretically be driven to stack
+ * overflow. In practice, this is mostly useful to backstop against a
+ * failure of the regex compiler to remove a loop of LACON arcs.
+ */
+ check_stack_depth();
+
+ for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
+ {
+ if (ca->co < cnfa->ncolors)
+ {
+ /* Ordinary arc, so count and possibly emit it */
+ int ndx = (*arcs_count)++;
+
+ if (ndx < arcs_len)
+ {
+ arcs[ndx].co = ca->co;
+ arcs[ndx].to = ca->to;
+ }
+ }
+ else
+ {
+ /* LACON arc --- assume it's satisfied and recurse... */
+ /* ... but first, assert it doesn't lead directly to post state */
+ Assert(ca->to != cnfa->post);
+
+ traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
+ }
+ }
+}
+
+/*
+ * Get number of outgoing NFA arcs of state number "st".
*/
int
pg_reg_getnumoutarcs(const regex_t *regex, int st)
{
struct cnfa *cnfa;
- struct carc *ca;
- int count;
+ int arcs_count;
assert(regex != NULL && regex->re_magic == REMAGIC);
cnfa = &((struct guts *) regex->re_guts)->search;
if (st < 0 || st >= cnfa->nstates)
return 0;
- count = 0;
- for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
- {
- if (ca->co < cnfa->ncolors)
- count++;
- }
- return count;
+ arcs_count = 0;
+ traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
+ return arcs_count;
}
/*
regex_arc_t *arcs, int arcs_len)
{
struct cnfa *cnfa;
- struct carc *ca;
+ int arcs_count;
assert(regex != NULL && regex->re_magic == REMAGIC);
cnfa = &((struct guts *) regex->re_guts)->search;
if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
return;
- for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
- {
- if (ca->co < cnfa->ncolors)
- {
- arcs->co = ca->co;
- arcs->to = ca->to;
- arcs++;
- if (--arcs_len == 0)
- break;
- }
- }
+ arcs_count = 0;
+ traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
}
/*