From 05d5c5959f815852f35b17d7ad7df048c30530e8 Mon Sep 17 00:00:00 2001
From: Ulya Trofimovich <skvadrik@gmail.com>
Date: Sat, 28 Jul 2018 23:30:04 +0100
Subject: [PATCH] Updated GOR1 (fixed the core algorithm to avoid useless
 re-scans of the same state).

Also, depth-first traversal was done in a slightly incorrect way:
we checked outgoing nodes for admissibility and pushed the corresponding
child states on stack all at once. This is not the same as checking
the first child and recursing into it, then checking the next child,
..., and so on (because we might discover the second child while exploring
the first, and admissiblitiy check for the second child *after* that
might yield false, while *before* exploring the first child it yielded
true).
---
 re2c/src/dfa/closure.cc   | 168 ++++++++++++++++++++++++++------------
 re2c/src/nfa/dump.cc      |   6 +-
 re2c/src/nfa/nfa.h        |  39 +++++----
 re2c/src/nfa/re_to_nfa.cc |  25 ++++++
 4 files changed, 164 insertions(+), 74 deletions(-)
diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc
index a097aafe..d0f21e14 100644
--- a/re2c/src/dfa/closure.cc
+++ b/re2c/src/dfa/closure.cc
@@ -97,109 +97,169 @@ static bool redundant(size_t t, const std::vector<Tag> &tags) {
  * Both disambiguation policies forbid epsilon-cycles with negative weight.
  */
 
-static void enqueue(clos_t x, std::stack<nfa_state_t*> &bstack, closure_t &done,
+static nfa_state_t *relax(clos_t x, closure_t &done,
 	closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
 {
-	nfa_state_t *n = x.state;
-	uint32_t &i = n->clos;
+	nfa_state_t *q = x.state;
+	uint32_t &i = q->clos;
 
+	// first time we see this state
 	if (i == NOCLOS) {
 		i = static_cast<uint32_t>(done.size());
 		done.push_back(x);
-	} else {
-		const int32_t cmp = compare_posix(x, done[i], tagpool, tags);
-		if (cmp < 0) std::swap(x, done[i]);
+	}
+	// States of in-degree less than 2 are not joint points;
+	// the fact that we are re-scanning this state means that we found
+	// a better path to some previous state. Due to the right distributivity
+	// of path comparison over path concatenation (X < Y => XZ < YZ) we
+	// can just propagate the new path up to the next join point.
+	else if (q->indeg < 2) {
+		std::swap(x, done[i]);
+		if (shadow) shadow->push_back(x);
+	}
+	// join point; compare the new path and the old path
+	else {
+		clos_t &y = done[i];
+		const int32_t cmp = compare_posix(x, y, tagpool, tags);
+		if (cmp < 0) std::swap(x, y);
 		if (shadow && cmp != 0) shadow->push_back(x);
-		if (cmp >= 0) return;
+		if (cmp >= 0) q = NULL;
 	}
 
-	if (n->status != GOR_TOPSORT) {
-		bstack.push(n);
-		n->status = GOR_NEWPASS;
-	}
+	return q;
 }
 
-static void scan(nfa_state_t *n, std::stack<nfa_state_t*> &bstack, closure_t &done,
+static nfa_state_t *explore(nfa_state_t *q, closure_t &done,
 	closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
 {
-	tagtree_t &history = tagpool.history;
-	clos_t x = done[n->clos];
-	switch (n->type) {
+	// find the next admissible transition, adjust the index
+	// of the next transition and return the to-state
+	nfa_state_t *p = NULL;
+	clos_t x = done[q->clos];
+	switch (q->type) {
 		case nfa_state_t::NIL:
-			x.state = n->nil.out;
-			enqueue(x, bstack, done, shadow, tagpool, tags);
+			if (q->arcidx == 0) {
+				x.state = q->nil.out;
+				p = relax(x, done, shadow, tagpool, tags);
+				++q->arcidx;
+			}
 			break;
 		case nfa_state_t::ALT:
-			x.state = n->alt.out2;
-			enqueue(x, bstack, done, shadow, tagpool, tags);
-			x.state = n->alt.out1;
-			enqueue(x, bstack, done, shadow, tagpool, tags);
+			if (q->arcidx == 0) {
+				x.state = q->alt.out1;
+				p = relax(x, done, shadow, tagpool, tags);
+				++q->arcidx;
+			}
+			if (q->arcidx == 1 && !p) {
+				x.state = q->alt.out2;
+				p = relax(x, done, shadow, tagpool, tags);
+				++q->arcidx;
+			}
 			break;
 		case nfa_state_t::TAG:
-			x.state = n->tag.out;
-			x.tlook = history.push(x.tlook, n->tag.info,
-				n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
-			enqueue(x, bstack, done, shadow, tagpool, tags);
+			if (q->arcidx == 0) {
+				x.state = q->tag.out;
+				x.tlook = tagpool.history.push(x.tlook, q->tag.info,
+					q->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
+				p = relax(x, done, shadow, tagpool, tags);
+				++q->arcidx;
+			}
 			break;
 		case nfa_state_t::RAN:
 		case nfa_state_t::FIN:
 			break;
 	}
+	return p;
 }
 
 void closure_posix(const closure_t &init, closure_t &done,
 	closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
 {
 	std::stack<nfa_state_t*>
-		&astack = tagpool.astack,
-		&bstack = tagpool.bstack;
+		&topsort = tagpool.astack,
+		&linear = tagpool.bstack;
+	nfa_state_t *q, *p;
 
-	// enqueue all initial states
 	done.clear();
 	if (shadow) shadow->clear();
+
+	// enqueue all initial states (there might be duplicates)
 	for (cclositer_t c = init.begin(); c != init.end(); ++c) {
-		enqueue(*c, bstack, done, shadow, tagpool, tags);
+		q = relax(*c, done, shadow, tagpool, tags);
+		if (q) {
+			topsort.push(q);
+			q->status = GOR_TOPSORT;
+		}
 	}
 
 	// Gordberg-Radzik 'shortest path' algorithm.
 	// Papers: 1993, "A heuristic improvement of the Bellman-Ford
 	// algorithm" by Goldberg, Radzik and 1996, Shortest paths algorithms:
-	// Theory andexperimental evaluation" by Cherkassky, Goldberg, Radzik.
+	// Theory and experimental evaluation" by Cherkassky, Goldberg, Radzik.
 	// Complexity for digraph G=(V,E) is O(|V|*|E|).
-	for (; !bstack.empty(); ) {
+	for (; !topsort.empty(); ) {
 
-		// 1st step: find admissible subgraph reachable from B-stack
+		// 1st pass: scan admissible subgraph reachable from B-stack
 		// and topologically sort it (this can be done by a single
-		// depth-first search that scans each state and pushes traversed
-		// states to A-stack in postorder)
-		for (; !bstack.empty(); ) {
-			nfa_state_t *n = bstack.top();
-			if (n->status == GOR_NEWPASS) {
-				n->status = GOR_TOPSORT;
-				scan(n, bstack, done, shadow, tagpool, tags);
-			} else if (n->status == GOR_TOPSORT) {
-				bstack.pop();
-				astack.push(n);
-			} else { // GOR_OFFSTACK
-				bstack.pop();
+		// depth-first postorder traversal)
+		for (; !topsort.empty(); ) {
+			q = topsort.top();
+			topsort.pop();
+
+			if (q->status != GOR_LINEAR) {
+				q->status = GOR_TOPSORT;
+
+				// find next admissible transition
+				while ((p = explore(q, done, shadow, tagpool, tags))
+					&& p->status != GOR_NOPASS) {
+					p->active = 1;
+				}
+
+				// follow the admissible transition
+				if (p) {
+					topsort.push(q);
+					topsort.push(p);
+					p->arcidx = 0;
+				}
+				// done with this state: all deps visited
+				else {
+					q->status = GOR_LINEAR;
+					linear.push(q);
+				}
 			}
 		}
 
-		// 2nd step: scan topologically ordered states from A-stack
+		// 2nd pass: scan topologically ordered states from A-stack
 		// and push head states of relaxed transitions to B-stack
-		for (; !astack.empty(); ) {
-			nfa_state_t *n = astack.top();
-			astack.pop();
-			scan(n, bstack, done, shadow, tagpool, tags);
-			n->status = GOR_OFFSTACK;
+		for (; !linear.empty(); ) {
+			q = linear.top();
+			linear.pop();
+
+			if (q->active) {
+				// scan admissible transitions
+				q->arcidx = 0;
+				while ((p = explore(q, done, shadow, tagpool, tags))) {
+					if (p->status == GOR_NOPASS) {
+						topsort.push(p);
+						p->arcidx = 0;
+					}
+					else if (p->status == GOR_LINEAR) {
+						p->active = 1;
+					}
+				}
+			}
+
+			q->status = GOR_NOPASS;
+			q->active = 0;
+			q->arcidx = 0;
 		}
 	}
 
-	// reset associated closure items and check status
-	// (do this before removing any states from closure)
+	// clean up (do this before removing any states from closure)
 	for (clositer_t i = done.begin(); i != done.end(); ++i) {
-		i->state->clos = NOCLOS;
-		assert(i->state->status == GOR_OFFSTACK);
+		q = i->state;
+		q->clos = NOCLOS;
+		assert(q->status == GOR_NOPASS && q->active == 0 && q->arcidx == 0);
 	}
 }
 
diff --git a/re2c/src/nfa/dump.cc b/re2c/src/nfa/dump.cc
index 4d418b8b..9c92c7ec 100644
--- a/re2c/src/nfa/dump.cc
+++ b/re2c/src/nfa/dump.cc
@@ -20,13 +20,13 @@ void dump_nfa(const nfa_t &nfa)
 	fprintf(stderr,
 		"digraph NFA {\n"
 		"  rankdir=LR\n"
-		"  node[shape=Mrecord fontname=fixed height=0.2 width=0.2]\n"
-		"  edge[arrowhead=vee fontname=fixed label=\" \"]\n\n");
+		"  node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n"
+		"  edge[arrowhead=vee fontname=Courier label=\" \"]\n\n");
 
 	for (uint32_t i = static_cast<uint32_t>(nfa.size); i --> 0;) {
 		const nfa_state_t *n = &nfa.states[i];
 
-		fprintf(stderr, "  n%u [label=\"%u\"]", i, i);
+		fprintf(stderr, "  n%u [label=\"%u(%d)\"]", i, i, n->indeg);
 		if (n->type == nfa_state_t::FIN) {
 			fprintf(stderr, " [fillcolor=gray]");
 		}
diff --git a/re2c/src/nfa/nfa.h b/re2c/src/nfa/nfa.h
index 9dcd7561..7951255b 100644
--- a/re2c/src/nfa/nfa.h
+++ b/re2c/src/nfa/nfa.h
@@ -18,7 +18,7 @@ namespace re2c
 struct clos_t;
 
 // Goldberg-Radzik 'shortest path' algorithm
-enum gor_status_t {GOR_OFFSTACK, GOR_NEWPASS, GOR_TOPSORT};
+enum gor_status_t {GOR_NOPASS = 0u, GOR_TOPSORT = 1u, GOR_LINEAR = 2u};
 
 static const uint32_t NOCLOS = ~0u;
 
@@ -49,26 +49,37 @@ struct nfa_state_t
 		} nil;
 	};
 	size_t rule;
+
+	// stuff needed for GOR1
 	uint32_t clos;
-	gor_status_t status;
+	gor_status_t status : 2;  // values 0, 1, 2
+	uint32_t arcidx     : 2;  // maximum out-dergee is 2
+	uint32_t active     : 1;  // boolean
+	uint32_t indeg      : 27; // the rest; we are unlikely to have more than 2^27 states
+
+	void init(size_t r)
+	{
+		rule = r;
+		clos = NOCLOS;
+		status = GOR_NOPASS;
+		arcidx = 0;
+		active = 0;
+		indeg = 0;
+	}
 
 	void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2)
 	{
 		type = ALT;
 		alt.out1 = s1;
 		alt.out2 = s2;
-		rule = r;
-		clos = NOCLOS;
-		status = GOR_OFFSTACK;
+		init(r);
 	}
 	void make_ran(size_t r, nfa_state_t *s, const Range *p)
 	{
 		type = RAN;
 		ran.out = s;
 		ran.ran = p;
-		rule = r;
-		clos = NOCLOS;
-		status = GOR_OFFSTACK;
+		init(r);
 	}
 	void make_tag(size_t r, nfa_state_t *s, size_t i, bool bottom)
 	{
@@ -76,24 +87,18 @@ struct nfa_state_t
 		tag.out = s;
 		tag.info = i;
 		tag.bottom = bottom;
-		rule = r;
-		clos = NOCLOS;
-		status = GOR_OFFSTACK;
+		init(r);
 	}
 	void make_fin(size_t r)
 	{
 		type = FIN;
-		rule = r;
-		clos = NOCLOS;
-		status = GOR_OFFSTACK;
+		init(r);
 	}
 	void make_nil(size_t r, nfa_state_t *s)
 	{
 		type = NIL;
 		nil.out = s;
-		rule = r;
-		clos = NOCLOS;
-		status = GOR_OFFSTACK;
+		init(r);
 	}
 };
 
diff --git a/re2c/src/nfa/re_to_nfa.cc b/re2c/src/nfa/re_to_nfa.cc
index e2d1f19a..0153a16b 100644
--- a/re2c/src/nfa/re_to_nfa.cc
+++ b/re2c/src/nfa/re_to_nfa.cc
@@ -89,6 +89,29 @@ static nfa_state_t *re_to_nfa(nfa_t &nfa, size_t nrule, const RE *re, nfa_state_
 	return s;
 }
 
+void calc_indegrees(nfa_state_t *n)
+{
+	++n->indeg;
+	if (n->indeg > 1) return;
+
+	switch (n->type) {
+		case nfa_state_t::NIL:
+			calc_indegrees(n->nil.out);
+			break;
+		case nfa_state_t::ALT:
+			calc_indegrees(n->alt.out1);
+			calc_indegrees(n->alt.out2);
+			break;
+		case nfa_state_t::TAG:
+			calc_indegrees(n->tag.out);
+			break;
+		case nfa_state_t::RAN:
+			calc_indegrees(n->ran.out);
+		case nfa_state_t::FIN:
+			break;
+	}
+}
+
 nfa_t::nfa_t(const RESpec &spec)
 	: max_size(estimate_size(spec.res))
 	, size(0)
@@ -115,6 +138,8 @@ nfa_t::nfa_t(const RESpec &spec)
 			root = s;
 		}
 	}
+
+	calc_indegrees(root);
 }
 
 nfa_t::~nfa_t()
-- 
2.40.0