+#include <queue>
+
#include "src/dfa/determinization.h"
#include "src/nfa/nfa.h"
-#include "src/util/debug_assert.h"
namespace re2c
{
-static nfa_state_t *explore(determ_context_t &, nfa_state_t *);
+struct cmp_gtop_t
+{
+ bool operator() (const nfa_state_t *, const nfa_state_t *) const;
+};
+
+static void closure_posix_gor1(determ_context_t &);
+static void closure_posix_gtop(determ_context_t &);
+static nfa_state_t *next_admissible_arc(determ_context_t &, nfa_state_t *);
static nfa_state_t *relax(determ_context_t &, clos_t);
+static void cleanup(closure_t &);
+
void closure_posix(determ_context_t &ctx)
{
- const closure_t &init = ctx.dc_reached;
+ closure_posix_gor1(ctx);
+}
+
+/*
+ * note [GOR1 SSSP algorithm]
+ * Cherkassky-Goldberg-Radzik Single Source Shortest Path algorithm.
+ *
+ * Papers:
+ * - "A heuristic improvement of the Bellman-Ford algorithm"
+ * by Goldberg, Radzik (1993)
+ * - "Shortest paths algorithms: Theory and experimental evaluation"
+ * by Cherkassky, Goldberg, Radzik (1996)
+ *
+ * Complexity for digraph G = (V, E) is O(|V| * |E|), and O(|V| + |E|)
+ * in case of acyclic graph.
+ */
+void closure_posix_gor1(determ_context_t &ctx)
+{
+ closure_t &init = ctx.dc_reached;
closure_t &done = ctx.dc_closure;
std::stack<nfa_state_t*>
&topsort = ctx.dc_stack_topsort,
}
}
- // Gordberg-Radzik 'shortest path' algorithm.
- // Papers: 1993, "A heuristic improvement of the Bellman-Ford
- // algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms:
- // Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik.
- // Complexity for digraph G=(V,E) is O(|V|*|E|).
for (; !topsort.empty(); ) {
// 1st pass: scan admissible subgraph reachable from B-stack
if (q->status != GOR_LINEAR) {
q->status = GOR_TOPSORT;
- // find next admissible transition
- while ((p = explore(ctx, q))
+ while ((p = next_admissible_arc(ctx, q))
&& p->status != GOR_NOPASS) {
p->active = 1;
}
- // follow the admissible transition
if (p) {
topsort.push(q);
topsort.push(p);
p->arcidx = 0;
}
- // done with this state: all deps visited
else {
q->status = GOR_LINEAR;
linear.push(q);
linear.pop();
if (q->active) {
- // scan admissible transitions
q->arcidx = 0;
- while ((p = explore(ctx, q))) {
+ while ((p = next_admissible_arc(ctx, q))) {
if (p->status == GOR_NOPASS) {
topsort.push(p);
p->arcidx = 0;
}
}
- // clean up (do this before removing any states from closure)
- for (clositer_t i = done.begin(); i != done.end(); ++i) {
- q = i->state;
- q->clos = NOCLOS;
+ cleanup(done);
+}
+
+/*
+ * note [GTOP SSSP algorithm]
+ * Global Topsort Single Source Shortest Path algorithm.
+ *
+ * It is well known that SSSP can be solved in linear time on DAGs (directed
+ * acyclic graphs) by exploring graph nodes in topological order. In our case
+ * TNFA is not a DAG (it may have cycles), but it is possible to compute fake
+ * topologcal order by ignoring back edges.
+ *
+ * The algorithm works by having a priority queue of nodes, where priorities
+ * are indices of nodes in fake topological ordering. At each step, the node
+ * with the minimal priority is popped from queue and explored. All nodes
+ * reachable from it on admissible arcs are enqueued, unless they are already
+ * on queue.
+ *
+ * The resulting algorithm is of course not optimal: it can get stuck on
+ * graphs with loops, because it will give priority to some of the loop nodes
+ * compared to others for no good reason.
+ *
+ * However the algorithm is simple and optimal for DAGs, therefore we keep it.
+ */
+void closure_posix_gtop(determ_context_t &ctx)
+{
+ const closure_t &init = ctx.dc_reached;
+ closure_t &done = ctx.dc_closure;
+
+ std::priority_queue<nfa_state_t*, std::vector<nfa_state_t*>
+ , cmp_gtop_t> todo;
+
+ done.clear();
+
+ for (cclositer_t c = init.begin(); c != init.end(); ++c) {
+ nfa_state_t *q = relax(ctx, *c);
+ if (q && q->active == 0) {
+ todo.push(q);
+ q->active = 1;
+ }
+ }
+
+ for (; !todo.empty(); ) {
+ nfa_state_t *q = todo.top();
+ todo.pop();
+ q->active = 0;
q->arcidx = 0;
- DASSERT(q->status == GOR_NOPASS && q->active == 0);
+
+ while (true) {
+ nfa_state_t *p = next_admissible_arc(ctx, q);
+ if (!p) break;
+ if (!p->active) {
+ todo.push(p);
+ p->active = 1;
+ }
+ }
}
+
+ cleanup(done);
}
+inline bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const
+{
+ return x->topord < y->topord;
+}
-nfa_state_t *explore(determ_context_t &ctx, nfa_state_t *q)
+nfa_state_t *next_admissible_arc(determ_context_t &ctx, nfa_state_t *q)
{
// find the next admissible transition, adjust the index
// of the next transition and return the to-state
return p;
}
-
nfa_state_t *relax(determ_context_t &ctx, clos_t x)
{
closure_t &done = ctx.dc_closure;
return q;
}
+void cleanup(closure_t &closure)
+{
+ for (clositer_t i = closure.begin(); i != closure.end(); ++i) {
+ nfa_state_t *q = i->state;
+ q->clos = NOCLOS;
+ q->arcidx = 0;
+ DASSERT(q->status == GOR_NOPASS && q->active == 0);
+ }
+}
void orders(determ_context_t &ctx)
{
namespace re2c {
+static void calc_indegrees(nfa_state_t *);
+static void calc_topord(nfa_state_t *, uint32_t &);
+
+
/*
* note [counted repetition and iteration expansion]
*
return s;
}
-void calc_indegrees(nfa_state_t *n)
-{
- ++n->indeg;
- if (n->indeg > 1) return;
-
- switch (n->type) {
- case nfa_state_t::NIL:
- calc_indegrees(n->nil.out);
- break;
- case nfa_state_t::ALT:
- calc_indegrees(n->alt.out1);
- calc_indegrees(n->alt.out2);
- break;
- case nfa_state_t::TAG:
- calc_indegrees(n->tag.out);
- break;
- case nfa_state_t::RAN:
- calc_indegrees(n->ran.out);
- case nfa_state_t::FIN:
- break;
- }
-}
-
nfa_t::nfa_t(const RESpec &spec)
: max_size(estimate_size(spec.res))
, size(0)
}
}
- calc_indegrees(root);
+ if (spec.opts->posix_captures) {
+ // needed for closure algorithms GOR1 and GTOP
+ uint32_t topord = 0;
+ calc_topord(root, topord);
+ calc_indegrees(root);
+ }
}
nfa_t::~nfa_t()
delete[] states;
}
+void calc_indegrees(nfa_state_t *n)
+{
+ ++n->indeg;
+ if (n->indeg > 1) return;
+
+ switch (n->type) {
+ case nfa_state_t::NIL:
+ calc_indegrees(n->nil.out);
+ break;
+ case nfa_state_t::ALT:
+ calc_indegrees(n->alt.out1);
+ calc_indegrees(n->alt.out2);
+ break;
+ case nfa_state_t::TAG:
+ calc_indegrees(n->tag.out);
+ break;
+ case nfa_state_t::RAN:
+ calc_indegrees(n->ran.out);
+ case nfa_state_t::FIN:
+ break;
+ }
+}
+
+void calc_topord(nfa_state_t *n, uint32_t &topord)
+{
+ if (n->topord != 0) return;
+ n->topord = ~0u; // temporary "visited" marker
+
+ switch (n->type) {
+ case nfa_state_t::NIL:
+ calc_topord(n->nil.out, topord);
+ break;
+ case nfa_state_t::ALT:
+ calc_topord(n->alt.out1, topord);
+ calc_topord(n->alt.out2, topord);
+ break;
+ case nfa_state_t::TAG:
+ calc_topord(n->tag.out, topord);
+ break;
+ case nfa_state_t::RAN:
+ calc_topord(n->ran.out, topord);
+ case nfa_state_t::FIN:
+ break;
+ }
+
+ n->topord = topord++;
+}
+
} // namespace re2c