namespace re2c
{
-static void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
- nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow, std::valarray<Rule> &rules);
+static void raw_closure(const closure_t &clos1, closure_t &clos, closure_t *shadow,
+ Tagpool &tagpool, const std::vector<Tag> &tags, std::valarray<Rule> &rules);
static bool better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, const std::vector<Tag> &tags);
static void lower_lookahead_to_transition(closure_t &clos);
static tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
const std::vector<Tag> &tags)
{
// build tagged epsilon-closure of the given set of NFA states
- clos2.clear();
- if (shadow) shadow->clear();
- for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) {
- closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules);
- }
+ raw_closure(clos1, clos2, shadow, tagpool, tags, rules);
orders(clos2, tagpool, tags);
* to leftmost strategy; orbit tags are compared by order and by tagged
* epsilon-paths so that earlier iterations are maximized).
*/
-void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
- nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow,
- std::valarray<Rule> &rules)
-{
- if (n->loop) return;
- local_increment_t<uint8_t> _(n->loop);
- tagtree_t &tagtree = tagpool.history;
- clositer_t c = clos.begin(), e = clos.end();
- switch (n->type) {
+static void indegree(nfa_state_t *s)
+{
+ ++s->indeg;
+ ++s->indeg_backup;
+ if (s->indeg > 1) return;
+ switch (s->type) {
case nfa_state_t::NIL:
- closure_one(clos, tagpool, c0, n->nil.out, tags, shadow, rules);
- return;
+ indegree(s->nil.out);
+ break;
case nfa_state_t::ALT:
- closure_one(clos, tagpool, c0, n->alt.out1, tags, shadow, rules);
- closure_one(clos, tagpool, c0, n->alt.out2, tags, shadow, rules);
- return;
+ indegree(s->alt.out1);
+ indegree(s->alt.out2);
+ break;
case nfa_state_t::TAG:
- tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
- closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules);
- tagtree.pop();
- return;
- case nfa_state_t::RAN:
- for (; c != e && c->state != n; ++c);
+ indegree(s->tag.out);
break;
- case nfa_state_t::FIN:
- // see note [at most one final item per closure]
- for (; c != e && c->state->type != nfa_state_t::FIN; ++c);
- if (c != e && c->state != n) {
- rules[n->rule].shadow.insert(rules[c->state->rule].code->fline);
- return;
- }
+ default:
break;
}
+}
- clos_t c2 = {c0.origin, n, c0.tvers, c0.ttran,
- tagtree.tail, c0.order, c0.index++};
+/*
+ * If there is an epsilon-loop through initial closure states X and Y,
+ * then in-degree of both X and Y in queue is non-zero; whichever of them
+ * is popped out of queue first (say, X) may lead to an epsilon-loop through
+ * Y back to X, reducing Y's in-degree before epsilon-path starting in Y is
+ * inspected. In such unfortunate cases we have to reinstate Y's original
+ * in-degree and repeat all the work.
+ *
+ * Paths with epsilon-loops will be terminated: by the time they are added
+ * to queue, the resulting closure must already contain a non-looping path
+ * for the same state, so the looping path must be compared to the old one.
+ * This comparison will favour non-looping path with both POSIX and leftmost
+ * policies. With leftmost non-looping history will dominate, since it is a
+ * prefix of looping history. With POSIX either histories are equal for all
+ * tags and there's no point in adding identical path to queue, or histories
+ * of some orbit tag are not equal and shorter orbit history dominates.
+ */
+static void enqueue(closure_t &todo, closure_t &done, closure_t *shadow,
+ clos_t x, Tagpool &tagpool, const std::vector<Tag> &tags)
+{
+ nfa_state_t *n = x.state;
+ clositer_t e, c;
+
+ if (n->indeg == 0) n->indeg = n->indeg_backup;
+ --n->indeg;
+
+ c = done.begin(); e = done.end();
+ for(; c != e && c->state != n; ++c);
if (c == e) {
- clos.push_back(c2);
+ done.push_back(x);
+ } else if (better(*c, x, tagpool, tags)) {
+ if (shadow) shadow->push_back(*c);
+ *c = x;
} else {
- clos_t &c1 = *c;
- if (better(c1, c2, tagpool, tags)) std::swap(c1, c2);
- if (shadow) shadow->push_back(c2);
+ if (shadow) shadow->push_back(x);
+ return;
+ }
+
+ c = todo.begin(); e = todo.end();
+ for(; c != e && c->state != n; ++c);
+ if (c == e) {
+ todo.push_back(x);
+ } else if (better(*c, x, tagpool, tags)) {
+ std::swap(*c, x);
+ }
+}
+
+void raw_closure(const closure_t &clos1, closure_t &done, closure_t *shadow,
+ Tagpool &tagpool, const std::vector<Tag> &tags, std::valarray<Rule> &rules)
+{
+ closure_t todo;
+ tagtree_t &history = tagpool.history;
+
+ // initialize in-degree of NFA states in this epsilon-closure
+ // (outer NFA transitions do not contribute to in-degree)
+ for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
+ indegree(c->state);
+ }
+
+ // enqueue all initial states
+ done.clear();
+ if (shadow) shadow->clear();
+ for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
+ enqueue(todo, done, shadow, *c, tagpool, tags);
+ }
+
+ while (!todo.empty()) {
+
+ // find state with the least in-degree and remove it from queue
+ clositer_t c = todo.begin(), e = todo.end(), c0 = c;
+ for (; c != e; ++c) {
+ if (c0->state->indeg == 0) break;
+ if (c0->state->indeg > c->state->indeg) c0 = c;
+ }
+ clos_t x = *c0;
+ *c0 = todo.back(); todo.pop_back(); // "quick" removal
+
+ // enqueue child NFA states
+ nfa_state_t *n = x.state;
+ switch (n->type) {
+ default: break;
+ case nfa_state_t::NIL:
+ x.state = n->nil.out;
+ enqueue(todo, done, shadow, x, tagpool, tags);
+ break;
+ case nfa_state_t::ALT:
+ x.state = n->alt.out1;
+ x.index = history.push(x.index, Tag::RIGHTMOST, 1);
+ enqueue(todo, done, shadow, x, tagpool, tags);
+ x.state = n->alt.out2;
+ x.index = history.push(x.index, Tag::RIGHTMOST, 0);
+ enqueue(todo, done, shadow, x, tagpool, tags);
+ break;
+ case nfa_state_t::TAG:
+ x.state = n->tag.out;
+ x.tlook = history.push(x.tlook, n->tag.info,
+ n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
+ enqueue(todo, done, shadow, x, tagpool, tags);
+ break;
+ }
+ }
+
+ // reset in-degree to zero (before removing any states from closure)
+ for (clositer_t c = done.begin(); c != done.end(); ++c) {
+ c->state->indeg = c->state->indeg_backup = 0;
+ }
+
+ // drop "inner" states (non-final without outgoing non-epsilon transitions)
+ clositer_t b = done.begin(), e = done.end(), f;
+ f = std::partition(b, e, clos_t::ran);
+ e = std::partition(f, e, clos_t::fin);
+ done.resize(static_cast<size_t>(e - b));
+
+ // drop all final states except one; mark dropped rules as shadowed
+ // see note [at most one final item per closure]
+ if (e != f) {
+ std::sort(f, e, cmpby_rule_state);
+ const uint32_t l = rules[f->state->rule].code->fline;
+ for (clositer_t c = f; ++c < e;) {
+ rules[c->state->rule].shadow.insert(l);
+ }
+ done.resize(static_cast<size_t>(f - b) + 1);
}
}
if (x < y) return false;
if (x > y) return true;
- const int cmp = tagtree.compare_orbits(l1, l2, t);
+ const int cmp = tagtree.compare_last(l1, l2, t);
if (cmp < 0) return false;
if (cmp > 0) return true;
if (x < y) return false;
if (x > y) return true;
- size_t i = c1.index, j = c2.index;
- if (i < j) return false;
- if (i > j) return true;
+ const int cmp = tagtree.compare_full(c1.index, c2.index, Tag::RIGHTMOST);
+ if (cmp < 0) return false;
+ if (cmp > 0) return true;
+
assert(false); // all indexes are different
}
}
* This part of the algorithm was invented by Christopher Kuklewicz.
*/
-typedef std::pair<tagver_t, hidx_t> key1_t;
-struct cmp_t
+typedef std::pair<tagver_t, hidx_t> key_t;
+struct cmp_orbit_t
{
tagtree_t &tree;
size_t tag;
- bool operator()(const key1_t &x, const key1_t &y)
+ bool operator()(const key_t &x, const key_t &y)
+ {
+ if (x.first < y.first) return true;
+ if (x.first > y.first) return false;
+ return tree.compare_last(x.second, y.second, tag) < 0;
+ }
+};
+struct cmp_leftmost_t
+{
+ tagtree_t &tree;
+ bool operator()(const key_t &x, const key_t &y)
{
if (x.first < y.first) return true;
if (x.first > y.first) return false;
- return tree.compare_orbits(x.second, y.second, tag) < 0;
+ return tree.compare_full(x.second, y.second, Tag::RIGHTMOST) < 0;
}
};
// see note [POSIX disambiguation]
if (orbit(tags[t])) {
- const cmp_t cmp = {tagtree, t};
- std::set<key1_t, cmp_t> keys1(cmp);
+ const cmp_orbit_t cmp = {tagtree, t};
+ std::set<key_t, cmp_orbit_t> keys(cmp);
for (c = b; c != e; ++c) {
- keys1.insert(key1_t(tagpool[c->order][t], c->tlook));
+ keys.insert(key_t(tagpool[c->order][t], c->tlook));
}
for (c = b; c != e; ++c, o += ntag) {
- const ptrdiff_t d = std::distance(keys1.begin(),
- keys1.find(key1_t(tagpool[c->order][t], c->tlook)));
+ const ptrdiff_t d = std::distance(keys.begin(),
+ keys.find(key_t(tagpool[c->order][t], c->tlook)));
o[t] = static_cast<tagver_t>(d);
}
// equals position of this item in leftmost NFA traversal
// (it's the same for all tags)
} else {
- typedef std::pair<tagver_t, size_t> key2_t;
- std::set<key2_t> keys2;
+ const cmp_leftmost_t cmp = {tagtree};
+ std::set<key_t, cmp_leftmost_t> keys(cmp);
for (c = b; c != e; ++c) {
- keys2.insert(key2_t(tagpool[c->order][t], c->index));
+ keys.insert(key_t(tagpool[c->order][t], c->index));
}
for (c = b; c != e; ++c, o += ntag) {
- const ptrdiff_t d = std::distance(keys2.begin(),
- keys2.find(key2_t(tagpool[c->order][t], c->index)));
+ const ptrdiff_t d = std::distance(keys.begin(),
+ keys.find(key_t(tagpool[c->order][t], c->index)));
o[t] = static_cast<tagver_t>(d);
}
}
namespace re2c
{
-tagtree_t::tagtree_t(): nodes(), tail(HROOT), path1(), path2() {}
+tagtree_t::tagtree_t(): nodes(), path1(), path2() {}
tagver_t tagtree_t::elem(hidx_t i) const { return nodes[i].elem; }
size_t tagtree_t::tag(hidx_t i) const { return nodes[i].tag; }
-void tagtree_t::push(size_t t, tagver_t v)
+hidx_t tagtree_t::push(hidx_t i, size_t t, tagver_t v)
{
- node_t x = {tail, v, t};
+ node_t x = {i, v, t};
nodes.push_back(x);
- tail = static_cast<hidx_t>(nodes.size() - 1);
-}
-
-void tagtree_t::pop()
-{
- // don't destroy the leaf itself, just update pointer to current leaf
- // (pointer to the the old leaf is stored in one of the closure items)
- tail = pred(tail);
+ return static_cast<hidx_t>(nodes.size() - 1);
}
// cut out subhistory of this tag (just skip all other tags)
-static void subhistory(const tagtree_t &history,
+static void full_subhistory(const tagtree_t &history,
std::vector<tagver_t> &path, hidx_t idx, size_t tag)
{
path.clear();
}
}
-// cut out a list of subhistories of this tag separated by tags
-// with higher priority (in POSIX they correspond to outer captures)
-static void subhistories(const tagtree_t &history,
+// the last subhistory of this tag: it begins at the first occurence
+// and ends at the next occurence of tag with higher priority (in POSIX
+// they correspond to outer captures) or when the whole history ends
+static void last_subhistory(const tagtree_t &history,
std::vector<tagver_t> &path, hidx_t idx, size_t tag)
{
- // 0 -- bottom, 1 -- cursor, 2 -- subhistory delimiter, so that
- // short history which is a prefix of a longer history dominates
path.clear();
- for (hidx_t i = idx;;) {
-
- // subhistory begins at the next occurence of this tag
- for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
- if (i == HROOT) break;
- path.push_back(2);
-
- // subhistory ends at the next occurence of tag with
- // higher priority or when the whole history ends
- for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) {
- // skip tags with lower priority
- if (history.tag(i) > tag) continue;
- path.push_back(history.elem(i) == TAGVER_CURSOR ? 1 : 0);
+ hidx_t i = idx;
+ for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
+ for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) {
+ if (history.tag(i) == tag) {
+ path.push_back(history.elem(i));
}
}
}
return 0;
}
-int32_t tagtree_t::compare_actions(hidx_t x, hidx_t y, size_t t)
+int32_t tagtree_t::compare_full(hidx_t x, hidx_t y, size_t t)
{
- subhistory(*this, path1, x, t);
- subhistory(*this, path2, y, t);
+ full_subhistory(*this, path1, x, t);
+ full_subhistory(*this, path2, y, t);
return compare_reversed(path1, path2);
}
-int32_t tagtree_t::compare_orbits(hidx_t x, hidx_t y, size_t t)
+int32_t tagtree_t::compare_last(hidx_t x, hidx_t y, size_t t)
{
- subhistories(*this, path1, x, t);
- subhistories(*this, path2, y, t);
+ last_subhistory(*this, path1, x, t);
+ last_subhistory(*this, path2, y, t);
return compare_reversed(path1, path2);
}