Avoid exponential blowup in tagged epsilon-closure construction.

author Ulya Trofimovich <skvadrik@gmail.com>

Tue, 16 May 2017 17:07:00 +0000 (18:07 +0100)

committer Ulya Trofimovich <skvadrik@gmail.com>

Tue, 16 May 2017 17:46:08 +0000 (18:46 +0100)
author Ulya Trofimovich <skvadrik@gmail.com>
Tue, 16 May 2017 17:07:00 +0000 (18:07 +0100)
committer Ulya Trofimovich <skvadrik@gmail.com>
Tue, 16 May 2017 17:46:08 +0000 (18:46 +0100)
diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc

index 37e32bd2ce982b570a0a9d998a9539c49a6f4ade..1f80e180265e3fa05d5c7d74d2360e687aa697b8 100644 (file)
--- a/re2c/src/dfa/closure.cc
+++ b/re2c/src/dfa/closure.cc
@@ -7,8 +7,8 @@
  namespace re2c
  {
  
-static void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
-       nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow, std::valarray<Rule> &rules);
+static void raw_closure(const closure_t &clos1, closure_t &clos, closure_t *shadow,
+       Tagpool &tagpool, const std::vector<Tag> &tags, std::valarray<Rule> &rules);
  static bool better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, const std::vector<Tag> &tags);
  static void lower_lookahead_to_transition(closure_t &clos);
  static tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
@@ -22,11 +22,7 @@ tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
         const std::vector<Tag> &tags)
  {
         // build tagged epsilon-closure of the given set of NFA states
-       clos2.clear();
-       if (shadow) shadow->clear();
-       for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) {
-               closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules);
-       }
+       raw_closure(clos1, clos2, shadow, tagpool, tags, rules);
  
         orders(clos2, tagpool, tags);
  
@@ -73,49 +69,150 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y)
   * to leftmost strategy; orbit tags are compared by order and by tagged
   * epsilon-paths so that earlier iterations are maximized).
   */
-void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
-       nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow,
-       std::valarray<Rule> &rules)
-{
-       if (n->loop) return;
-       local_increment_t<uint8_t> _(n->loop);
  
-       tagtree_t &tagtree = tagpool.history;
-       clositer_t c = clos.begin(), e = clos.end();
-       switch (n->type) {
+static void indegree(nfa_state_t *s)
+{
+       ++s->indeg;
+       ++s->indeg_backup;
+       if (s->indeg > 1) return;
+       switch (s->type) {
                 case nfa_state_t::NIL:
-                       closure_one(clos, tagpool, c0, n->nil.out, tags, shadow, rules);
-                       return;
+                       indegree(s->nil.out);
+                       break;
                 case nfa_state_t::ALT:
-                       closure_one(clos, tagpool, c0, n->alt.out1, tags, shadow, rules);
-                       closure_one(clos, tagpool, c0, n->alt.out2, tags, shadow, rules);
-                       return;
+                       indegree(s->alt.out1);
+                       indegree(s->alt.out2);
+                       break;
                 case nfa_state_t::TAG:
-                       tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
-                       closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules);
-                       tagtree.pop();
-                       return;
-               case nfa_state_t::RAN:
-                       for (; c != e && c->state != n; ++c);
+                       indegree(s->tag.out);
                         break;
-               case nfa_state_t::FIN:
-                       // see note [at most one final item per closure]
-                       for (; c != e && c->state->type != nfa_state_t::FIN; ++c);
-                       if (c != e && c->state != n) {
-                               rules[n->rule].shadow.insert(rules[c->state->rule].code->fline);
-                               return;
-                       }
+               default:
                         break;
         }
+}
  
-       clos_t c2 = {c0.origin, n, c0.tvers, c0.ttran,
-               tagtree.tail, c0.order, c0.index++};
+/*
+ * If there is an epsilon-loop through initial closure states X and Y,
+ * then in-degree of both X and Y in queue is non-zero; whichever of them
+ * is popped out of queue first (say, X) may lead to an epsilon-loop through
+ * Y back to X, reducing Y's in-degree before epsilon-path starting in Y is
+ * inspected. In such unfortunate cases we have to reinstate Y's original
+ * in-degree and repeat all the work.
+ *
+ * Paths with epsilon-loops will be terminated: by the time they are added
+ * to queue, the resulting closure must already contain a non-looping path
+ * for the same state, so the looping path must be compared to the old one.
+ * This comparison will favour non-looping path with both POSIX and leftmost
+ * policies. With leftmost non-looping history will dominate, since it is a
+ * prefix of looping history. With POSIX either histories are equal for all
+ * tags and there's no point in adding identical path to queue, or histories
+ * of some orbit tag are not equal and shorter orbit history dominates.
+ */
+static void enqueue(closure_t &todo, closure_t &done, closure_t *shadow,
+       clos_t x, Tagpool &tagpool, const std::vector<Tag> &tags)
+{
+       nfa_state_t *n = x.state;
+       clositer_t e, c;
+
+       if (n->indeg == 0) n->indeg = n->indeg_backup;
+       --n->indeg;
+
+       c = done.begin(); e = done.end();
+       for(; c != e && c->state != n; ++c);
         if (c == e) {
-               clos.push_back(c2);
+               done.push_back(x);
+       } else if (better(*c, x, tagpool, tags)) {
+               if (shadow) shadow->push_back(*c);
+               *c = x;
         } else {
-               clos_t &c1 = *c;
-               if (better(c1, c2, tagpool, tags)) std::swap(c1, c2);
-               if (shadow) shadow->push_back(c2);
+               if (shadow) shadow->push_back(x);
+               return;
+       }
+
+       c = todo.begin(); e = todo.end();
+       for(; c != e && c->state != n; ++c);
+       if (c == e) {
+               todo.push_back(x);
+       } else if (better(*c, x, tagpool, tags)) {
+               std::swap(*c, x);
+       }
+}
+
+void raw_closure(const closure_t &clos1, closure_t &done, closure_t *shadow,
+       Tagpool &tagpool, const std::vector<Tag> &tags, std::valarray<Rule> &rules)
+{
+       closure_t todo;
+       tagtree_t &history = tagpool.history;
+
+       // initialize in-degree of NFA states in this epsilon-closure
+       // (outer NFA transitions do not contribute to in-degree)
+       for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
+               indegree(c->state);
+       }
+
+       // enqueue all initial states
+       done.clear();
+       if (shadow) shadow->clear();
+       for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
+               enqueue(todo, done, shadow, *c, tagpool, tags);
+       }
+
+       while (!todo.empty()) {
+
+               // find state with the least in-degree and remove it from queue
+               clositer_t c = todo.begin(), e = todo.end(), c0 = c;
+               for (; c != e; ++c) {
+                       if (c0->state->indeg == 0) break;
+                       if (c0->state->indeg > c->state->indeg) c0 = c;
+               }
+               clos_t x = *c0;
+               *c0 = todo.back(); todo.pop_back(); // "quick" removal
+
+               // enqueue child NFA states
+               nfa_state_t *n = x.state;
+               switch (n->type) {
+                       default: break;
+                       case nfa_state_t::NIL:
+                               x.state = n->nil.out;
+                               enqueue(todo, done, shadow, x, tagpool, tags);
+                               break;
+                       case nfa_state_t::ALT:
+                               x.state = n->alt.out1;
+                               x.index = history.push(x.index, Tag::RIGHTMOST, 1);
+                               enqueue(todo, done, shadow, x, tagpool, tags);
+                               x.state = n->alt.out2;
+                               x.index = history.push(x.index, Tag::RIGHTMOST, 0);
+                               enqueue(todo, done, shadow, x, tagpool, tags);
+                               break;
+                       case nfa_state_t::TAG:
+                               x.state = n->tag.out;
+                               x.tlook = history.push(x.tlook, n->tag.info,
+                                       n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
+                               enqueue(todo, done, shadow, x, tagpool, tags);
+                               break;
+               }
+       }
+
+       // reset in-degree to zero (before removing any states from closure)
+       for (clositer_t c = done.begin(); c != done.end(); ++c) {
+               c->state->indeg = c->state->indeg_backup = 0;
+       }
+
+       // drop "inner" states (non-final without outgoing non-epsilon transitions)
+       clositer_t b = done.begin(), e = done.end(), f;
+       f = std::partition(b, e, clos_t::ran);
+       e = std::partition(f, e, clos_t::fin);
+       done.resize(static_cast<size_t>(e - b));
+
+       // drop all final states except one; mark dropped rules as shadowed
+       // see note [at most one final item per closure]
+       if (e != f) {
+               std::sort(f, e, cmpby_rule_state);
+               const uint32_t l = rules[f->state->rule].code->fline;
+               for (clositer_t c = f; ++c < e;) {
+                       rules[c->state->rule].shadow.insert(l);
+               }
+               done.resize(static_cast<size_t>(f - b) + 1);
         }
  }
  
@@ -164,7 +261,7 @@ bool better(const clos_t &c1, const clos_t &c2,
                         if (x < y) return false;
                         if (x > y) return true;
  
-                       const int cmp = tagtree.compare_orbits(l1, l2, t);
+                       const int cmp = tagtree.compare_last(l1, l2, t);
                         if (cmp < 0) return false;
                         if (cmp > 0) return true;
  
@@ -199,9 +296,10 @@ bool better(const clos_t &c1, const clos_t &c2,
                         if (x < y) return false;
                         if (x > y) return true;
  
-                       size_t i = c1.index, j = c2.index;
-                       if (i < j) return false;
-                       if (i > j) return true;
+                       const int cmp = tagtree.compare_full(c1.index, c2.index, Tag::RIGHTMOST);
+                       if (cmp < 0) return false;
+                       if (cmp > 0) return true;
+
                         assert(false); // all indexes are different
                 }
         }
@@ -352,16 +450,26 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
   * This part of the algorithm was invented by Christopher Kuklewicz.
   */
  
-typedef std::pair<tagver_t, hidx_t> key1_t;
-struct cmp_t
+typedef std::pair<tagver_t, hidx_t> key_t;
+struct cmp_orbit_t
  {
         tagtree_t &tree;
         size_t tag;
-       bool operator()(const key1_t &x, const key1_t &y)
+       bool operator()(const key_t &x, const key_t &y)
+       {
+               if (x.first < y.first) return true;
+               if (x.first > y.first) return false;
+               return tree.compare_last(x.second, y.second, tag) < 0;
+       }
+};
+struct cmp_leftmost_t
+{
+       tagtree_t &tree;
+       bool operator()(const key_t &x, const key_t &y)
         {
                 if (x.first < y.first) return true;
                 if (x.first > y.first) return false;
-               return tree.compare_orbits(x.second, y.second, tag) < 0;
+               return tree.compare_full(x.second, y.second, Tag::RIGHTMOST) < 0;
         }
  };
  
@@ -386,14 +494,14 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
  
                 // see note [POSIX disambiguation]
                 if (orbit(tags[t])) {
-                       const cmp_t cmp = {tagtree, t};
-                       std::set<key1_t, cmp_t> keys1(cmp);
+                       const cmp_orbit_t cmp = {tagtree, t};
+                       std::set<key_t, cmp_orbit_t> keys(cmp);
                         for (c = b; c != e; ++c) {
-                               keys1.insert(key1_t(tagpool[c->order][t], c->tlook));
+                               keys.insert(key_t(tagpool[c->order][t], c->tlook));
                         }
                         for (c = b; c != e; ++c, o += ntag) {
-                               const ptrdiff_t d = std::distance(keys1.begin(),
-                                       keys1.find(key1_t(tagpool[c->order][t], c->tlook)));
+                               const ptrdiff_t d = std::distance(keys.begin(),
+                                       keys.find(key_t(tagpool[c->order][t], c->tlook)));
                                 o[t] = static_cast<tagver_t>(d);
                         }
  
@@ -401,14 +509,14 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                 // equals position of this item in leftmost NFA traversal
                 // (it's the same for all tags)
                 } else {
-                       typedef std::pair<tagver_t, size_t> key2_t;
-                       std::set<key2_t> keys2;
+                       const cmp_leftmost_t cmp = {tagtree};
+                       std::set<key_t, cmp_leftmost_t> keys(cmp);
                         for (c = b; c != e; ++c) {
-                               keys2.insert(key2_t(tagpool[c->order][t], c->index));
+                               keys.insert(key_t(tagpool[c->order][t], c->index));
                         }
                         for (c = b; c != e; ++c, o += ntag) {
-                               const ptrdiff_t d = std::distance(keys2.begin(),
-                                       keys2.find(key2_t(tagpool[c->order][t], c->index)));
+                               const ptrdiff_t d = std::distance(keys.begin(),
+                                       keys.find(key_t(tagpool[c->order][t], c->index)));
                                 o[t] = static_cast<tagver_t>(d);
                         }
                 }
diff --git a/re2c/src/dfa/closure.h b/re2c/src/dfa/closure.h

index e6cd4d115fa492778ad72005123e1892b36d9aac..8e9663ed3d7bfb40783237a61518c58aec6f2d04 100644 (file)
--- a/re2c/src/dfa/closure.h
+++ b/re2c/src/dfa/closure.h
@@ -14,13 +14,14 @@ struct clos_t
  {
         nfa_state_t *origin; // for debug only
         nfa_state_t *state;
+       size_t order; // vector of orders
         size_t tvers; // vector of tag versions (including lookahead tags)
         hidx_t ttran; // history of transition tags
         hidx_t tlook; // history of lookahead tags
-       size_t order; // vector of orders
-       size_t index; // leftmost order in NFA traversal
+       hidx_t index; // history of left/right alternatives in NFA traversal
  
         static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
+       static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
  };
  
  typedef std::vector<clos_t> closure_t;
@@ -45,7 +46,7 @@ struct newver_cmp_t
                 if (x.base < y.base) return true;
                 if (x.base > y.base) return false;
  
-               return history.compare_actions(x.history, y.history, x.tag) < 0;
+               return history.compare_full(x.history, y.history, x.tag) < 0;
         }
  };
  
diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc

index 868603ca2123ccdd679d0bd61c1cd8aa4a40f713..00c376def2a55a4b8e271a10380c04ae227649d0 100644 (file)
--- a/re2c/src/dfa/determinization.cc
+++ b/re2c/src/dfa/determinization.cc
@@ -41,8 +41,8 @@ void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
                 nfa_state_t *s1 = kernel->state[i],
                         *s2 = transition(s1, symbol);
                 if (s2) {
-                       clos_t c = {s1, s2, kernel->tvers[i], kernel->tlook[i],
-                               HROOT, kernel->order[i], 0};
+                       clos_t c = {s1, s2, kernel->order[i], kernel->tvers[i],
+                               kernel->tlook[i], HROOT, HROOT};
                         clos.push_back(c);
                 }
         }
@@ -86,7 +86,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
         // build tagged epsilon-closure of all reachable NFA states,
         // then find identical or mappable DFA state or add a new one
  
-       clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, HROOT, HROOT, ZERO_TAGS, 0};
+       clos_t c0 = {NULL, nfa.root, ZERO_TAGS, INITIAL_TAGS, HROOT, HROOT, HROOT};
         clos1.push_back(c0);
         acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
         find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump);
diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc

index e7a43a4bcbe1d5f65321f29ee45af550dd2db054..195cae7de5c02af38acd708b6b671458b4d795cc 100644 (file)
--- a/re2c/src/dfa/dump.cc
+++ b/re2c/src/dfa/dump.cc
@@ -50,7 +50,7 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
  
         const Tag &t = dfa.tags[h.tag(i)];
         const tagver_t v = h.elem(i);
-       if (v == TAGVER_BOTTOM) fprintf(stderr, "<O>");
+       if (v == TAGVER_BOTTOM) fprintf(stderr, "-");
         if (capture(t)) {
                 fprintf(stderr, "%u_", (uint32_t)t.ncap);
         } else if (trailing(t)) {
@@ -58,7 +58,6 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
         } else {
                 fprintf(stderr, "%s", t.name->c_str());
         }
-       if (v == TAGVER_BOTTOM) fprintf(stderr, "</O>");
         fprintf(stderr, " ");
  }
  
@@ -73,6 +72,7 @@ void dump_dfa_t::closure_tags(cclositer_t c)
  
         for (size_t t = 0; t < ntag; ++t) {
                 fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
+               fprintf(stderr, "[%d]", tagpool[c->order][t]);
         }
  
         if (l != HROOT) {
diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc

index f5966e7e4d4a90afd172a7559cc18baf3d6ab2b5..f43a6cc265da668d6b56741c0ffe7a59d6671399 100644 (file)
--- a/re2c/src/dfa/find_state.cc
+++ b/re2c/src/dfa/find_state.cc
@@ -45,7 +45,7 @@ static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
                 for (size_t t = 0; t < tagpool.ntags; ++t) {
                         if (history(tags[t])) {
                                 // compare subhistories
-                               if (h.compare_actions(xl, yl, t) != 0) return false;
+                               if (h.compare_full(xl, yl, t) != 0) return false;
                         } else {
                                 // compare only the last tags
                                 if (h.last(xl, t) != h.last(yl, t)) return false;
diff --git a/re2c/src/dfa/tagtree.cc b/re2c/src/dfa/tagtree.cc

index 471ca4ceb784f0a37905260264cbddfbec261e2d..801650a5f432924fc7167082fa347d072ada9508 100644 (file)
--- a/re2c/src/dfa/tagtree.cc
+++ b/re2c/src/dfa/tagtree.cc
@@ -6,7 +6,7 @@
  namespace re2c
  {
  
-tagtree_t::tagtree_t(): nodes(), tail(HROOT), path1(), path2() {}
+tagtree_t::tagtree_t(): nodes(), path1(), path2() {}
  
  tagver_t tagtree_t::elem(hidx_t i) const { return nodes[i].elem; }
  
@@ -14,22 +14,15 @@ hidx_t tagtree_t::pred(hidx_t i) const { return nodes[i].pred; }
  
  size_t tagtree_t::tag(hidx_t i) const { return nodes[i].tag; }
  
-void tagtree_t::push(size_t t, tagver_t v)
+hidx_t tagtree_t::push(hidx_t i, size_t t, tagver_t v)
  {
-       node_t x = {tail, v, t};
+       node_t x = {i, v, t};
         nodes.push_back(x);
-       tail = static_cast<hidx_t>(nodes.size() - 1);
-}
-
-void tagtree_t::pop()
-{
-       // don't destroy the leaf itself, just update pointer to current leaf
-       // (pointer to the the old leaf is stored in one of the closure items)
-       tail = pred(tail);
+       return static_cast<hidx_t>(nodes.size() - 1);
  }
  
  // cut out subhistory of this tag (just skip all other tags)
-static void subhistory(const tagtree_t &history,
+static void full_subhistory(const tagtree_t &history,
         std::vector<tagver_t> &path, hidx_t idx, size_t tag)
  {
         path.clear();
@@ -40,27 +33,18 @@ static void subhistory(const tagtree_t &history,
         }
  }
  
-// cut out a list of subhistories of this tag separated by tags
-// with higher priority (in POSIX they correspond to outer captures)
-static void subhistories(const tagtree_t &history,
+// the last subhistory of this tag: it begins at the first occurence
+// and ends at the next occurence of tag with higher priority (in POSIX
+// they correspond to outer captures) or when the whole history ends
+static void last_subhistory(const tagtree_t &history,
         std::vector<tagver_t> &path, hidx_t idx, size_t tag)
  {
-       // 0 -- bottom, 1 -- cursor, 2 -- subhistory delimiter, so that
-       // short history which is a prefix of a longer history dominates
         path.clear();
-       for (hidx_t i = idx;;) {
-
-               // subhistory begins at the next occurence of this tag
-               for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
-               if (i == HROOT) break;
-               path.push_back(2);
-
-               // subhistory ends at the next occurence of tag with
-               // higher priority or when the whole history ends
-               for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) {
-                       // skip tags with lower priority
-                       if (history.tag(i) > tag) continue;
-                       path.push_back(history.elem(i) == TAGVER_CURSOR ? 1 : 0);
+       hidx_t i = idx;
+       for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
+       for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) {
+               if (history.tag(i) == tag) {
+                       path.push_back(history.elem(i));
                 }
         }
  }
@@ -85,17 +69,17 @@ static int32_t compare_reversed(
         return 0;
  }
  
-int32_t tagtree_t::compare_actions(hidx_t x, hidx_t y, size_t t)
+int32_t tagtree_t::compare_full(hidx_t x, hidx_t y, size_t t)
  {
-       subhistory(*this, path1, x, t);
-       subhistory(*this, path2, y, t);
+       full_subhistory(*this, path1, x, t);
+       full_subhistory(*this, path2, y, t);
         return compare_reversed(path1, path2);
  }
  
-int32_t tagtree_t::compare_orbits(hidx_t x, hidx_t y, size_t t)
+int32_t tagtree_t::compare_last(hidx_t x, hidx_t y, size_t t)
  {
-       subhistories(*this, path1, x, t);
-       subhistories(*this, path2, y, t);
+       last_subhistory(*this, path1, x, t);
+       last_subhistory(*this, path2, y, t);
         return compare_reversed(path1, path2);
  }
  
diff --git a/re2c/src/dfa/tagtree.h b/re2c/src/dfa/tagtree.h

index ac997fb0aad43fc7f6eaaaa2966f0b2c3cc78514..3a6881a2fdd4dae867afcd56539db62e6fd54f70 100644 (file)
--- a/re2c/src/dfa/tagtree.h
+++ b/re2c/src/dfa/tagtree.h
@@ -23,7 +23,6 @@ struct tagtree_t
                 size_t tag;
         };
         std::vector<node_t> nodes;
-       hidx_t tail;
  
         // reconstruct paths for comparison
         std::vector<tagver_t> path1;
@@ -33,10 +32,9 @@ struct tagtree_t
         hidx_t pred(hidx_t i) const;
         tagver_t elem(hidx_t i) const;
         size_t tag(hidx_t i) const;
-       void push(size_t t, tagver_t v);
-       void pop();
-       int32_t compare_actions(hidx_t x, hidx_t y, size_t t);
-       int32_t compare_orbits(hidx_t x, hidx_t y, size_t t);
+       hidx_t push(hidx_t i, size_t t, tagver_t v);
+       int32_t compare_full(hidx_t x, hidx_t y, size_t t);
+       int32_t compare_last(hidx_t x, hidx_t y, size_t t);
         tagver_t last(hidx_t i, size_t t) const;
         FORBID_COPY(tagtree_t);
  };
diff --git a/re2c/src/nfa/nfa.h b/re2c/src/nfa/nfa.h

index d6a19489180ced4dd172709e72984f4984567f82..8032f388ebfd222235997cd65f8c9a54a50ca891 100644 (file)
--- a/re2c/src/nfa/nfa.h
+++ b/re2c/src/nfa/nfa.h
@@ -42,7 +42,8 @@ struct nfa_state_t
                 } nil;
         };
         size_t rule;
-       uint8_t loop;
+       uint16_t indeg;
+       uint16_t indeg_backup;
  
         void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2)
         {
@@ -50,7 +51,7 @@ struct nfa_state_t
                 alt.out1 = s1;
                 alt.out2 = s2;
                 rule = r;
-               loop = 0;
+               indeg = indeg_backup = 0;
         }
         void make_ran(size_t r, nfa_state_t *s, const Range *p)
         {
@@ -58,7 +59,7 @@ struct nfa_state_t
                 ran.out = s;
                 ran.ran = p;
                 rule = r;
-               loop = 0;
+               indeg = indeg_backup = 0;
         }
         void make_tag(size_t r, nfa_state_t *s, size_t i, bool bottom)
         {
@@ -67,20 +68,20 @@ struct nfa_state_t
                 tag.info = i;
                 tag.bottom = bottom;
                 rule = r;
-               loop = 0;
+               indeg = indeg_backup = 0;
         }
         void make_fin(size_t r)
         {
                 type = FIN;
                 rule = r;
-               loop = 0;
+               indeg = indeg_backup = 0;
         }
         void make_nil(size_t r, nfa_state_t *s)
         {
                 type = NIL;
                 nil.out = s;
                 rule = r;
-               loop = 0;
+               indeg = indeg_backup = 0;
         }
  };
  
diff --git a/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.c b/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.c

new file mode 100644 (file)

index 0000000..1c959e5
--- /dev/null
+++ b/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.c
@@ -0,0 +1,22 @@
+/* Generated by re2c */
+// test for epsilon closure construction:
+// exponential blowup if paths are not merged
+// as soon as they arrive at the same NFA state
+
+{
+       YYCTYPE yych;
+       yyt1 = YYCURSOR;
+       {
+               const size_t yynmatch = 3;
+               const YYCTYPE *yypmatch[yynmatch * 2];
+               yypmatch[0] = yyt1;
+               yypmatch[3] = yyt1;
+               yypmatch[5] = yyt1;
+               yypmatch[1] = YYCURSOR;
+               yypmatch[2] = yyt1;
+               yypmatch[4] = yyt1;
+               {}
+       }
+}
+
+re2c: warning: line 5: rule matches empty string [-Wmatch-empty-string]
diff --git a/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.re b/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.re

new file mode 100644 (file)

index 0000000..6d77a18
--- /dev/null
+++ b/re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.re
@@ -0,0 +1,6 @@
+// test for epsilon closure construction:
+// exponential blowup if paths are not merged
+// as soon as they arrive at the same NFA state
+/*!re2c
+    ((""){0,100}){0,100} {}
+*/
author	Ulya Trofimovich <skvadrik@gmail.com>
	Tue, 16 May 2017 17:07:00 +0000 (18:07 +0100)
committer	Ulya Trofimovich <skvadrik@gmail.com>
	Tue, 16 May 2017 17:46:08 +0000 (18:46 +0100)
re2c/src/dfa/closure.cc		patch \| blob \| history
re2c/src/dfa/closure.h		patch \| blob \| history
re2c/src/dfa/determinization.cc		patch \| blob \| history
re2c/src/dfa/dump.cc		patch \| blob \| history
re2c/src/dfa/find_state.cc		patch \| blob \| history
re2c/src/dfa/tagtree.cc		patch \| blob \| history
re2c/src/dfa/tagtree.h		patch \| blob \| history
re2c/src/nfa/nfa.h		patch \| blob \| history
re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.c	[new file with mode: 0644]	patch \| blob
re2c/test/posix_captures/exponential_epsilon_closure.i--posix-captures.re	[new file with mode: 0644]	patch \| blob