]> granicus.if.org Git - re2c/commitdiff
Don't split tag history into individual sub-histories for tags.
authorUlya Trofimovich <skvadrik@gmail.com>
Sat, 29 Apr 2017 20:13:50 +0000 (21:13 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Sat, 29 Apr 2017 20:13:50 +0000 (21:13 +0100)
This is necassary for correct comparison of orbit tag histories:
if orbit tag is nested in an outer capture, this outer capture is
under repetition and there is an epsilon-path through it, then
this epsilon-path may contain pieces of orbit history that belong
to different iterations of outer capture; these pieces will be
glued together and the boundary between them will be lost.

Example: ((""){0,3}){0,2}.

However, in a common history we can always find boundaries
(they are marked by tags that correspond to outer captures).

12 files changed:
re2c/src/dfa/closure.cc
re2c/src/dfa/closure.h
re2c/src/dfa/determinization.cc
re2c/src/dfa/dump.cc
re2c/src/dfa/dump.h
re2c/src/dfa/find_state.cc
re2c/src/dfa/find_state.h
re2c/src/dfa/tagpool.cc
re2c/src/dfa/tagtree.cc
re2c/src/dfa/tagtree.h
re2c/src/dfa/tcmd.cc
re2c/src/dfa/tcmd.h

index ae6a758a9d955b0dacf2258eda5bf377f7450964..37e32bd2ce982b570a0a9d998a9539c49a6f4ade 100644 (file)
@@ -24,7 +24,6 @@ tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
        // build tagged epsilon-closure of the given set of NFA states
        clos2.clear();
        if (shadow) shadow->clear();
-       tagpool.history.init();
        for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) {
                closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules);
        }
@@ -94,7 +93,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
                case nfa_state_t::TAG:
                        tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
                        closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules);
-                       tagtree.pop(n->tag.info);
+                       tagtree.pop();
                        return;
                case nfa_state_t::RAN:
                        for (; c != e && c->state != n; ++c);
@@ -110,7 +109,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
        }
 
        clos_t c2 = {c0.origin, n, c0.tvers, c0.ttran,
-               tagpool.insert(tagtree.leaves()), c0.order, c0.index++};
+               tagtree.tail, c0.order, c0.index++};
        if (c == e) {
                clos.push_back(c2);
        } else {
@@ -147,9 +146,10 @@ bool better(const clos_t &c1, const clos_t &c2,
                && c1.order == c2.order
                && c1.index == c2.index) return false;
 
+       const hidx_t
+               l1 = c1.tlook, l2 = c2.tlook,
+               t1 = c1.ttran, t2 = c2.ttran;
        const tagver_t
-               *l1 = tagpool[c1.tlook], *l2 = tagpool[c2.tlook],
-               *t1 = tagpool[c1.ttran], *t2 = tagpool[c2.ttran],
                *v1 = tagpool[c1.tvers], *v2 = tagpool[c2.tvers],
                *o1 = tagpool[c1.order], *o2 = tagpool[c2.order];
        tagver_t x, y;
@@ -164,7 +164,7 @@ bool better(const clos_t &c1, const clos_t &c2,
                        if (x < y) return false;
                        if (x > y) return true;
 
-                       const int cmp = tagtree.compare_paths(l1[t], l2[t]);
+                       const int cmp = tagtree.compare_orbits(l1, l2, t);
                        if (cmp < 0) return false;
                        if (cmp > 0) return true;
 
@@ -175,14 +175,14 @@ bool better(const clos_t &c1, const clos_t &c2,
                // we don't use orders for minimize/maximize, because they are
                // already used for leftmost
                } else if (capture(tag)) {
-                       x = tagtree.elem(l1[t]);
-                       y = tagtree.elem(l2[t]);
+                       x = tagtree.last(l1, t);
+                       y = tagtree.last(l2, t);
                        if (x < 0 || y < 0) goto leftmost;
                        if (x > y) return false;
                        if (x < y) return true;
 
-                       x = tagtree.elem(t1[t]);
-                       y = tagtree.elem(t2[t]);
+                       x = tagtree.last(t1, t);
+                       y = tagtree.last(t2, t);
                        if (x < 0 || y < 0) goto leftmost;
                        if (x > y) return false;
                        if (x < y) return true;
@@ -232,7 +232,7 @@ void lower_lookahead_to_transition(closure_t &clos)
 {
        for (clositer_t c = clos.begin(); c != clos.end(); ++c) {
                c->ttran = c->tlook;
-               c->tlook = ZERO_TAGS;
+               c->tlook = HROOT;
        }
 }
 
@@ -252,25 +252,26 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
        // normal transition, however absolute value should be unique
        // among all versions of all tags)
        for (c = b; c != e; ++c) {
-               const tagver_t
-                       *ls = tagpool[c->tlook],
-                       *us = tagpool[c->ttran],
-                       *vs = tagpool[c->tvers];
+               const hidx_t l = c->tlook, h = c->ttran;
+               if (h == HROOT) continue;
+
+               const tagver_t *vs = tagpool[c->tvers];
                for (size_t t = 0; t < ntag; ++t) {
                        const Tag &tag = tags[t];
-                       const tagver_t u = us[t],
-                               u0 = tagtree.elem(u),
-                               l = tagtree.elem(ls[t]);
-                       if (u0 == TAGVER_ZERO) continue;
+                       const tagver_t
+                               h0 = tagtree.last(h, t),
+                               l0 = tagtree.last(l, t);
 
-                       const tagver_t h = history(tag) ? vs[t] : TAGVER_ZERO;
-                       newver_t x = {t, h, u};
+                       if (h0 == TAGVER_ZERO) continue;
+
+                       const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO;
+                       newver_t x = {t, v, h};
                        const tagver_t
-                               n = (maxver + 1) * (u0 == TAGVER_BOTTOM ? -1 : 1),
+                               n = (maxver + 1) * (h0 == TAGVER_BOTTOM ? -1 : 1),
                                m = newvers.insert(std::make_pair(x, n)).first->second;
                        if (n == m) ++maxver;
 
-                       if (!fixed(tag) && (l == TAGVER_ZERO || history(tag))) {
+                       if (!fixed(tag) && (l0 == TAGVER_ZERO || history(tag))) {
                                newacts.insert(std::make_pair(x, m));
                        }
                }
@@ -278,32 +279,31 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
 
        // actions
        for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) {
-               const tagver_t
-                       m = i->second,
-                       h = i->first.ver,
-                       u = i->first.act;
-               if (history(tags[i->first.tag])) {
-                       cmd = tcpool.make_add(cmd, abs(m), abs(h), u, tagtree);
+               const tagver_t m = i->second, v = i->first.base;
+               const hidx_t h = i->first.history;
+               const size_t t = i->first.tag;
+               if (history(tags[t])) {
+                       cmd = tcpool.make_add(cmd, abs(m), abs(v), tagtree, h, t);
                } else {
-                       cmd = tcpool.make_set(cmd, abs(m), tagtree.elem(u));
+                       cmd = tcpool.make_set(cmd, abs(m), tagtree.last(h, t));
                }
        }
 
        // update tag versions in closure
        for (c = b; c != e; ++c) {
-               if (c->ttran == ZERO_TAGS) continue;
-               const tagver_t
-                       *us = tagpool[c->ttran],
-                       *vs = tagpool[c->tvers];
+               const hidx_t h = c->ttran;
+               if (h == HROOT) continue;
+
+               const tagver_t *vs = tagpool[c->tvers];
                for (size_t t = 0; t < ntag; ++t) {
-                       const bool historic = history(tags[t]);
-                       const tagver_t v = vs[t], u = us[t],
-                               u0 = tagtree.elem(u),
-                               h = historic ? v : TAGVER_ZERO;
-                       if (u0 == TAGVER_ZERO) {
-                               vers[t] = v;
+                       const tagver_t
+                               v0 = vs[t],
+                               h0 = tagtree.last(h, t),
+                               v = history(tags[t]) ? v : TAGVER_ZERO;
+                       if (h0 == TAGVER_ZERO) {
+                               vers[t] = v0;
                        } else {
-                               newver_t x = {t, h, u};
+                               newver_t x = {t, v, h};
                                vers[t] = newvers[x];
                        }
                }
@@ -352,15 +352,16 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
  * This part of the algorithm was invented by Christopher Kuklewicz.
  */
 
-typedef std::pair<tagver_t, tagver_t> key1_t;
+typedef std::pair<tagver_t, hidx_t> key1_t;
 struct cmp_t
 {
        tagtree_t &tree;
+       size_t tag;
        bool operator()(const key1_t &x, const key1_t &y)
        {
                if (x.first < y.first) return true;
                if (x.first > y.first) return false;
-               return tree.compare_paths(x.second, y.second) < 0;
+               return tree.compare_orbits(x.second, y.second, tag) < 0;
        }
 };
 
@@ -372,12 +373,6 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                ntag = tagpool.ntags,
                nclos = clos.size();
 
-       const cmp_t cmp = {tagtree};
-       std::set<key1_t, cmp_t> keys1(cmp);
-
-       typedef std::pair<tagver_t, size_t> key2_t;
-       std::set<key2_t> keys2;
-
        size_t &maxclos = tagpool.maxclos;
        tagver_t *&orders = tagpool.orders, *o;
        if (maxclos < nclos) {
@@ -391,13 +386,14 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
 
                // see note [POSIX disambiguation]
                if (orbit(tags[t])) {
-                       keys1.clear();
+                       const cmp_t cmp = {tagtree, t};
+                       std::set<key1_t, cmp_t> keys1(cmp);
                        for (c = b; c != e; ++c) {
-                               keys1.insert(key1_t(tagpool[c->order][t], tagpool[c->tlook][t]));
+                               keys1.insert(key1_t(tagpool[c->order][t], c->tlook));
                        }
                        for (c = b; c != e; ++c, o += ntag) {
                                const ptrdiff_t d = std::distance(keys1.begin(),
-                                       keys1.find(key1_t(tagpool[c->order][t], tagpool[c->tlook][t])));
+                                       keys1.find(key1_t(tagpool[c->order][t], c->tlook)));
                                o[t] = static_cast<tagver_t>(d);
                        }
 
@@ -405,6 +401,8 @@ void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
                // equals position of this item in leftmost NFA traversal
                // (it's the same for all tags)
                } else {
+                       typedef std::pair<tagver_t, size_t> key2_t;
+                       std::set<key2_t> keys2;
                        for (c = b; c != e; ++c) {
                                keys2.insert(key2_t(tagpool[c->order][t], c->index));
                        }
index f5744c415537c7a4eaaec0571184896ebde32d1a..e6cd4d115fa492778ad72005123e1892b36d9aac 100644 (file)
@@ -15,8 +15,8 @@ struct clos_t
        nfa_state_t *origin; // for debug only
        nfa_state_t *state;
        size_t tvers; // vector of tag versions (including lookahead tags)
-       size_t ttran; // vector of transition tags
-       size_t tlook; // vector of lookahead tags
+       hidx_t ttran; // history of transition tags
+       hidx_t tlook; // history of lookahead tags
        size_t order; // vector of orders
        size_t index; // leftmost order in NFA traversal
 
@@ -30,8 +30,8 @@ typedef closure_t::const_iterator cclositer_t;
 struct newver_t
 {
        size_t tag;
-       tagver_t ver;
-       tagver_t act;
+       tagver_t base;
+       hidx_t history;
 };
 
 struct newver_cmp_t
@@ -42,10 +42,10 @@ struct newver_cmp_t
                if (x.tag < y.tag) return true;
                if (x.tag > y.tag) return false;
 
-               if (x.ver < y.ver) return true;
-               if (x.ver > y.ver) return false;
+               if (x.base < y.base) return true;
+               if (x.base > y.base) return false;
 
-               return history.compare_paths(x.act, y.act) < 0;
+               return history.compare_actions(x.history, y.history, x.tag) < 0;
        }
 };
 
index 34027029277a517c67e3ceb0b60eabb6d6cc3cdf..868603ca2123ccdd679d0bd61c1cd8aa4a40f713 100644 (file)
@@ -42,7 +42,7 @@ void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
                        *s2 = transition(s1, symbol);
                if (s2) {
                        clos_t c = {s1, s2, kernel->tvers[i], kernel->tlook[i],
-                               ZERO_TAGS, kernel->order[i], 0};
+                               HROOT, kernel->order[i], 0};
                        clos.push_back(c);
                }
        }
@@ -86,7 +86,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        // build tagged epsilon-closure of all reachable NFA states,
        // then find identical or mappable DFA state or add a new one
 
-       clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS, ZERO_TAGS, ZERO_TAGS, 0};
+       clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, HROOT, HROOT, ZERO_TAGS, 0};
        clos1.push_back(c0);
        acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
        find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump);
index 4d2fe704764322563b1a3312bc834f46b5607116..e7a43a4bcbe1d5f65321f29ee45af550dd2db054 100644 (file)
@@ -7,7 +7,7 @@ namespace re2c
 
 static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sym, const tcpool_t &tcpool);
 static const char *tagname(const Tag &t);
-static void dump_tags(const Tagpool &tagpool, size_t ttran, size_t tvers);
+static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers);
 
 dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg)
        : debug(dbg)
@@ -39,35 +39,44 @@ uint32_t dump_dfa_t::index(const nfa_state_t *s)
        return static_cast<uint32_t>(s - base);
 }
 
-void dump_dfa_t::closure_tags(cclositer_t c,
-       const tagver_t *lookahead, bool shadowed)
+static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
+{
+       if (i == HROOT) {
+               fprintf(stderr, " /");
+               return;
+       }
+
+       dump_history(dfa, h, h.pred(i));
+
+       const Tag &t = dfa.tags[h.tag(i)];
+       const tagver_t v = h.elem(i);
+       if (v == TAGVER_BOTTOM) fprintf(stderr, "<O>");
+       if (capture(t)) {
+               fprintf(stderr, "%u_", (uint32_t)t.ncap);
+       } else if (trailing(t)) {
+               fprintf(stderr, "*");
+       } else {
+               fprintf(stderr, "%s", t.name->c_str());
+       }
+       if (v == TAGVER_BOTTOM) fprintf(stderr, "</O>");
+       fprintf(stderr, " ");
+}
+
+void dump_dfa_t::closure_tags(cclositer_t c)
 {
        if (!debug) return;
        if (c->tvers == ZERO_TAGS) return;
 
-       const tagver_t
-               *look = tagpool[c->tlook],
-               *vers = tagpool[c->tvers],
-               *ord =  tagpool[c->order];
+       const hidx_t l = c->tlook;
+       const tagver_t *vers = tagpool[c->tvers];
        const size_t ntag = tagpool.ntags;
+
        for (size_t t = 0; t < ntag; ++t) {
-               const Tag &tag = dfa.tags[t];
-
-               fprintf(stderr, " %s", tagname(tag));
-               fprintf(stderr, "%d", abs(vers[t]));
-               if (lookahead[t]) {
-                       const tagver_t l = tagpool.history.elem(look[t]);
-                       if (l == TAGVER_BOTTOM) {
-                               fprintf(stderr, " &darr;");
-                       } else if (l == TAGVER_CURSOR) {
-                               fprintf(stderr, " &uarr;");
-                       } else {
-                               fprintf(stderr, "  ");
-                       }
-               }
-               if (!shadowed && capture(tag)) {
-                       fprintf(stderr, "[%d]", ord[t]);
-               }
+               fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
+       }
+
+       if (l != HROOT) {
+               dump_history(dfa, tagpool.history, l);
        }
 }
 
@@ -86,17 +95,10 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
                " CELLBORDER=\"1\""
                ">", isnew ? "" : "i", state);
 
-       tagver_t *look = tagpool.buffer;
-       for (size_t t = 0; t < tagpool.ntags; ++t) {
-               for (c = c1; c != c2 && tagpool.history.elem(tagpool[c->tlook][t]) == TAGVER_ZERO; ++c);
-               for (s = s1; s != s2 && tagpool.history.elem(tagpool[s->tlook][t]) == TAGVER_ZERO; ++s);
-               look[t] = c != c2 || s != s2;
-       }
-
        for (s = s1; s != s2; ++s) {
                fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"_%u_%ld\"%s%s><FONT%s>%u",
                        index(s->state), s - s1, color, style, color, index(s->state));
-               closure_tags(s, look, true);
+               closure_tags(s);
                fprintf(stderr, "</FONT></TD></TR>");
        }
        if (!shadow->empty()) {
@@ -105,7 +107,7 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
        for (c = c1; c != c2; ++c) {
                fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
                        index(c->state), style, index(c->state));
-               closure_tags(c, look, true);
+               closure_tags(c);
                fprintf(stderr, "</TD></TR>");
        }
        fprintf(stderr, "</TABLE>>]\n");
@@ -285,20 +287,21 @@ const char *tagname(const Tag &t)
        return t.name ? t.name->c_str() : "";
 }
 
-void dump_tags(const Tagpool &tagpool, size_t ttran, size_t tvers)
+void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers)
 {
-       if (ttran == ZERO_TAGS) return;
+       if (ttran == HROOT) return;
+
+       const tagver_t *vers = tagpool[tvers];
+       const tagtree_t &h = tagpool.history;
 
        fprintf(stderr, "/");
-       const tagver_t
-               *tran = tagpool[ttran],
-               *vers = tagpool[tvers];
        for (size_t i = 0; i < tagpool.ntags; ++i) {
-               tagver_t v = vers[i], t = tran[i];
-               if (tagpool.history.elem(t) == TAGVER_ZERO) continue;
-               fprintf(stderr, "%d", abs(v));
-               for (; t != -1; t = tagpool.history.pred(t)) {
-                       if (tagpool.history.elem(t) < TAGVER_ZERO) {
+               if (h.last(ttran, i) == TAGVER_ZERO) continue;
+
+               fprintf(stderr, "%d", abs(vers[i]));
+               for (hidx_t t = ttran; t != HROOT; t = h.pred(t)) {
+                       if (h.tag(t) != i) continue;
+                       if (h.elem(t) < TAGVER_ZERO) {
                                fprintf(stderr, "&darr;");
                        } else if (t > TAGVER_ZERO) {
                                fprintf(stderr, "&uarr;");
index d323bb156e57f714a2e9920ea0d7ec6129b946dd..4043ffe863055050c8a9228c7aa6c12ab800df42 100644 (file)
@@ -18,7 +18,7 @@ struct dump_dfa_t
 
        dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n, bool dbg);
        ~dump_dfa_t();
-       void closure_tags(cclositer_t c, const tagver_t *lookahead, bool shadowed);
+       void closure_tags(cclositer_t c);
        void closure(const closure_t &clos, uint32_t state, bool isnew);
        void state0(const closure_t &clos);
        void state(const closure_t &clos, size_t state, size_t symbol, bool isnew);
index f2d9d0bab372f023b41263a96723cf2f3b9d8daa..f5966e7e4d4a90afd172a7559cc18baf3d6ab2b5 100644 (file)
@@ -10,7 +10,7 @@ kernel_t::kernel_t(size_t n)
        : size(n)
        , state(new nfa_state_t*[size])
        , tvers(new size_t[size])
-       , tlook(new size_t[size])
+       , tlook(new hidx_t[size])
        , order(new size_t[size])
 {}
 
@@ -20,7 +20,7 @@ kernel_t *kernel_t::copy(const kernel_t &k)
        kernel_t *kcopy = new kernel_t(n);
        memcpy(kcopy->state, k.state, n * sizeof(void*));
        memcpy(kcopy->tvers, k.tvers, n * sizeof(size_t));
-       memcpy(kcopy->tlook, k.tlook, n * sizeof(size_t));
+       memcpy(kcopy->tlook, k.tlook, n * sizeof(hidx_t));
        memcpy(kcopy->order, k.order, n * sizeof(size_t));
        return kcopy;
 }
@@ -36,22 +36,19 @@ kernel_t::~kernel_t()
 static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
        Tagpool &tagpool, const std::vector<Tag> &tags)
 {
-       if (memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0) {
+       if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
                return true;
        }
        tagtree_t &h = tagpool.history;
        for (size_t i = 0; i < x->size; ++i) {
-               const tagver_t
-                       *xls = tagpool[x->tlook[i]],
-                       *yls = tagpool[y->tlook[i]];
+               const hidx_t xl = x->tlook[i], yl = y->tlook[i];
                for (size_t t = 0; t < tagpool.ntags; ++t) {
-                       const tagver_t xl = xls[t], yl = yls[t];
                        if (history(tags[t])) {
-                               // compare whole histories
-                               if (h.compare_paths(xl, yl) != 0) return false;
+                               // compare subhistories
+                               if (h.compare_actions(xl, yl, t) != 0) return false;
                        } else {
                                // compare only the last tags
-                               if (h.elem(xl) != h.elem(yl)) return false;
+                               if (h.last(xl, t) != h.last(yl, t)) return false;
                        }
                }
        }
@@ -127,12 +124,12 @@ bool kernels_t::operator()(const kernel_t *k1, const kernel_t *k2)
        for (size_t i = 0; i < k1->size; ++i) {
                const tagver_t
                        *xv = tagpool[k1->tvers[i]],
-                       *yv = tagpool[k2->tvers[i]],
-                       *xl = tagpool[k1->tlook[i]];
+                       *yv = tagpool[k2->tvers[i]];
+               const hidx_t xl = k1->tlook[i];
 
                for (size_t t = 0; t < ntag; ++t) {
                        // see note [mapping ignores items with lookahead tags]
-                       if (tagpool.history.elem(xl[t]) != TAGVER_ZERO
+                       if (tagpool.history.last(xl, t) != TAGVER_ZERO
                                && !history(tags[t])) continue;
 
                        const tagver_t x = xv[t], y = yv[t];
@@ -347,19 +344,18 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
 {
        tcpool_t &tcpool = dfa.tcpool;
        const Rule &rule = dfa.rules[ridx];
-       const tagver_t
-               *look = tagpool[clos.tlook],
-               *vers = tagpool[clos.tvers];
+       const tagver_t *vers = tagpool[clos.tvers];
+       const tagtree_t &hist = tagpool.history;
+       const hidx_t look = clos.tlook;
        tcmd_t *copy = NULL, *save = NULL, **p;
 
        for (size_t t = rule.ltag; t < rule.htag; ++t) {
-               const bool historic = history(tags[t]);
-               const tagver_t v = abs(vers[t]),
-                       l = tagpool.history.elem(look[t]);
+               const Tag &tag = tags[t];
+               const tagver_t v = abs(vers[t]), l = hist.last(look, t);
                tagver_t &f = dfa.finvers[t];
 
                // don't waste versions on fixed tags
-               if (fixed(dfa.tags[t])) continue;
+               if (fixed(tag)) continue;
 
                // pick a fresh version: final version is also used as fallback one
                if (f == TAGVER_ZERO) {
@@ -368,8 +364,8 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
 
                if (l == TAGVER_ZERO) {
                        copy = tcpool.make_copy(copy, f, v);
-               } else if (historic) {
-                       save = tcpool.make_add(save, f, v, look[t], tagpool.history);
+               } else if (history(tag)) {
+                       save = tcpool.make_add(save, f, v, hist, look, t);
                } else {
                        save = tcpool.make_set(save, f, l);
                }
index 17be9a0f9bab7f2fa3b612bef0879cf22f30eb9b..d5a6d13a0f5af4eea8194e2a33f522b148ace394 100644 (file)
@@ -14,7 +14,7 @@ struct kernel_t
        size_t size;
        nfa_state_t **state;
        size_t *tvers; // tag versions
-       size_t *tlook; // lookahead tags
+       hidx_t *tlook; // lookahead tags
        size_t *order; // see note [orbit order of closure items]
 
        explicit kernel_t(size_t n);
index f0adc14bbc5a71e21526523cd484fd811fe0d0e2..db121306a11d082a178f395e92a299dad4122007 100644 (file)
@@ -24,7 +24,7 @@ Tagpool::Tagpool(size_t n)
        , buffer(new tagver_t[n])
        , maxclos(0)
        , orders(NULL)
-       , history(n)
+       , history()
 {}
 
 Tagpool::~Tagpool()
index faf2946dc52edda6a105be8d63766a8f29e4af25..471ca4ceb784f0a37905260264cbddfbec261e2d 100644 (file)
@@ -6,68 +6,72 @@
 namespace re2c
 {
 
-tagtree_t::tagtree_t(size_t n)
-       : nodes()
-       , path1()
-       , path2()
-       , ntag(n)
-       , tags(new tagver_t[ntag])
-{
-       node_t x = {-1, TAGVER_ZERO};
-       nodes.push_back(x);
-       init();
-}
-
-tagtree_t::~tagtree_t()
-{
-       delete[] tags;
-}
-
-void tagtree_t::init()
-{
-       memset(tags, 0, ntag * sizeof(tagver_t));
-}
+tagtree_t::tagtree_t(): nodes(), tail(HROOT), path1(), path2() {}
 
-tagver_t tagtree_t::elem(tagver_t i) const
-{
-       return nodes[static_cast<size_t>(i)].elem;
-}
+tagver_t tagtree_t::elem(hidx_t i) const { return nodes[i].elem; }
 
-tagver_t tagtree_t::pred(tagver_t i) const
-{
-       return nodes[static_cast<size_t>(i)].pred;
-}
+hidx_t tagtree_t::pred(hidx_t i) const { return nodes[i].pred; }
 
-const tagver_t *tagtree_t::leaves() const
-{
-       return tags;
-}
+size_t tagtree_t::tag(hidx_t i) const { return nodes[i].tag; }
 
 void tagtree_t::push(size_t t, tagver_t v)
 {
-       node_t x = {tags[t], v};
+       node_t x = {tail, v, t};
        nodes.push_back(x);
-       tags[t] = static_cast<tagver_t>(nodes.size() - 1);
+       tail = static_cast<hidx_t>(nodes.size() - 1);
 }
 
-void tagtree_t::pop(size_t t)
+void tagtree_t::pop()
 {
        // don't destroy the leaf itself, just update pointer to current leaf
        // (pointer to the the old leaf is stored in one of the closure items)
-       tags[t] = pred(tags[t]);
+       tail = pred(tail);
 }
 
-int32_t tagtree_t::compare_paths(tagver_t x, tagver_t y)
+// cut out subhistory of this tag (just skip all other tags)
+static void subhistory(const tagtree_t &history,
+       std::vector<tagver_t> &path, hidx_t idx, size_t tag)
 {
-       path1.clear();
-       for (; x != -1; x = pred(x)) path1.push_back(elem(x));
+       path.clear();
+       for (hidx_t i = idx; i != HROOT; i = history.pred(i)) {
+               if (history.tag(i) == tag) {
+                       path.push_back(history.elem(i));
+               }
+       }
+}
 
-       path2.clear();
-       for (; y != -1; y = pred(y)) path2.push_back(elem(y));
+// cut out a list of subhistories of this tag separated by tags
+// with higher priority (in POSIX they correspond to outer captures)
+static void subhistories(const tagtree_t &history,
+       std::vector<tagver_t> &path, hidx_t idx, size_t tag)
+{
+       // 0 -- bottom, 1 -- cursor, 2 -- subhistory delimiter, so that
+       // short history which is a prefix of a longer history dominates
+       path.clear();
+       for (hidx_t i = idx;;) {
+
+               // subhistory begins at the next occurence of this tag
+               for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
+               if (i == HROOT) break;
+               path.push_back(2);
+
+               // subhistory ends at the next occurence of tag with
+               // higher priority or when the whole history ends
+               for (; i != HROOT && history.tag(i) >= tag; i = history.pred(i)) {
+                       // skip tags with lower priority
+                       if (history.tag(i) > tag) continue;
+                       path.push_back(history.elem(i) == TAGVER_CURSOR ? 1 : 0);
+               }
+       }
+}
 
+static int32_t compare_reversed(
+       const std::vector<tagver_t> &h1,
+       const std::vector<tagver_t> &h2)
+{
        std::vector<tagver_t>::const_reverse_iterator
-               i1 = path1.rbegin(), e1 = path1.rend(),
-               i2 = path2.rbegin(), e2 = path2.rend();
+               i1 = h1.rbegin(), e1 = h1.rend(),
+               i2 = h2.rbegin(), e2 = h2.rend();
 
        for (;;) {
                if (i1 == e1 && i2 == e2) break;
@@ -81,4 +85,26 @@ int32_t tagtree_t::compare_paths(tagver_t x, tagver_t y)
        return 0;
 }
 
+int32_t tagtree_t::compare_actions(hidx_t x, hidx_t y, size_t t)
+{
+       subhistory(*this, path1, x, t);
+       subhistory(*this, path2, y, t);
+       return compare_reversed(path1, path2);
+}
+
+int32_t tagtree_t::compare_orbits(hidx_t x, hidx_t y, size_t t)
+{
+       subhistories(*this, path1, x, t);
+       subhistories(*this, path2, y, t);
+       return compare_reversed(path1, path2);
+}
+
+tagver_t tagtree_t::last(hidx_t i, size_t t) const
+{
+       for (; i != HROOT; i = pred(i)) {
+               if (tag(i) == t) return elem(i);
+       }
+       return TAGVER_ZERO;
+}
+
 } // namespace re2c
index fe4add97d34910b5fa6d33fd57e31d4c9a093d2a..ac997fb0aad43fc7f6eaaaa2966f0b2c3cc78514 100644 (file)
@@ -9,35 +9,35 @@
 namespace re2c
 {
 
-class tagtree_t
+typedef uint32_t hidx_t;
+
+static const hidx_t HROOT = ~0u;
+
+struct tagtree_t
 {
        // the whole tree of tags found by the epsilon-closure
        // (a bunch of separate subtrees for each tag with common root)
        struct node_t {
-               tagver_t pred;
+               hidx_t pred;
                tagver_t elem;
+               size_t tag;
        };
        std::vector<node_t> nodes;
+       hidx_t tail;
 
        // reconstruct paths for comparison
        std::vector<tagver_t> path1;
        std::vector<tagver_t> path2;
 
-       // set of leaves (one leaf per tag) corresponding to
-       // current deep-first search path in the epsilon-closure
-       size_t ntag;
-       tagver_t *tags;
-
-public:
-       explicit tagtree_t(size_t n);
-       ~tagtree_t();
-       void init();
-       tagver_t pred(tagver_t i) const;
-       tagver_t elem(tagver_t i) const;
-       const tagver_t *leaves() const;
+       tagtree_t();
+       hidx_t pred(hidx_t i) const;
+       tagver_t elem(hidx_t i) const;
+       size_t tag(hidx_t i) const;
        void push(size_t t, tagver_t v);
-       void pop(size_t t);
-       int32_t compare_paths(tagver_t x, tagver_t y);
+       void pop();
+       int32_t compare_actions(hidx_t x, hidx_t y, size_t t);
+       int32_t compare_orbits(hidx_t x, hidx_t y, size_t t);
+       tagver_t last(hidx_t i, size_t t) const;
        FORBID_COPY(tagtree_t);
 };
 
index 5f11749002108078da3b25dacc80f3e280b374e8..36079270c86c484200f71ee850597ec21b8cc50c 100644 (file)
@@ -143,19 +143,25 @@ tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set)
 }
 
 tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs,
-       tagver_t hidx, const tagtree_t &history)
+       const tagtree_t &history, hidx_t hidx, size_t tag)
 {
        size_t hlen = 0;
-       for (tagver_t i = hidx; i != -1; i = history.pred(i)) ++hlen;
+       for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
+               if (history.tag(i) == tag) ++hlen;
+       }
 
-       const size_t size = sizeof(tcmd_t) + (hlen - 1) * sizeof(tagver_t);
+       const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t);
        tcmd_t *p = static_cast<tcmd_t*>(alc.alloc(size));
        p->next = next;
        p->lhs = lhs;
        p->rhs = rhs;
-       for (tagver_t i = hidx, *h = p->history; i != -1; i = history.pred(i)) {
-               *h++ = history.elem(i);
+       tagver_t *h = p->history;
+       for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) {
+               if (history.tag(i) == tag) {
+                       *h++ = history.elem(i);
+               }
        }
+       *h++ = TAGVER_ZERO;
        return p;
 }
 
index 4dde594f433b38dc1c727590b2ebd7f1a217990c..d48890bbee7d8b63d191009c62a49d00446820fa 100644 (file)
@@ -43,7 +43,7 @@ public:
        tcpool_t();
        tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs);
        tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set);
-       tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, tagver_t hidx, const tagtree_t &history);
+       tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagtree_t &history, hidx_t hidx, size_t tag);
        tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history);
        tcid_t insert(const tcmd_t *tcmd);
        const tcmd_t *operator[](tcid_t id) const;