]> granicus.if.org Git - re2c/commitdiff
Keep tag histories for the whole time of determinization.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 10 Apr 2017 21:35:15 +0000 (22:35 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Tue, 11 Apr 2017 09:45:19 +0000 (10:45 +0100)
re2c/src/dfa/closure.cc
re2c/src/dfa/closure.h
re2c/src/dfa/determinization.cc
re2c/src/dfa/dump.cc
re2c/src/dfa/find_state.cc
re2c/src/dfa/tagpool.cc
re2c/src/dfa/tagpool.h
re2c/src/dfa/tagtree.cc

index 3a7feaac48b26d2832cee3e4bf1bb9d2a4517054..de1eb447d5e18cdc0024d4a6217fbc14cc520167 100644 (file)
@@ -7,28 +7,29 @@
 namespace re2c
 {
 
-static void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree,
-       clos_t &c0, nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow, std::valarray<Rule> &rules);
-static bool better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, tagtree_t &tagtree, const std::vector<Tag> &tags);
+static void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
+       nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow, std::valarray<Rule> &rules);
+static bool better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, const std::vector<Tag> &tags);
 static void lower_lookahead_to_transition(closure_t &clos);
-static tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags, Tagpool &tagpool, tcpool_t &tcpool, tagver_t &maxver, newvers_t &newvers);
-static void orders(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, const std::vector<Tag> &tags);
+static tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
+       Tagpool &tagpool, tcpool_t &tcpool, tagver_t &maxver, newvers_t &newvers);
+static void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags);
 static bool cmpby_rule_state(const clos_t &x, const clos_t &y);
 
 tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
-       tcpool_t &tcpool, tagtree_t &tagtree, std::valarray<Rule> &rules,
-       tagver_t &maxver, newvers_t &newvers, bool lookahead, closure_t *shadow,
+       tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver,
+       newvers_t &newvers, bool lookahead, closure_t *shadow,
        const std::vector<Tag> &tags)
 {
        // build tagged epsilon-closure of the given set of NFA states
        clos2.clear();
        if (shadow) shadow->clear();
-       tagtree.init();
+       tagpool.history.init();
        for (clositer_t c = clos1.begin(); c != clos1.end(); ++c) {
-               closure_one(clos2, tagpool, tagtree, *c, c->state, tags, shadow, rules);
+               closure_one(clos2, tagpool, *c, c->state, tags, shadow, rules);
        }
 
-       orders(clos2, tagpool, tagtree, tags);
+       orders(clos2, tagpool, tags);
 
        std::sort(clos2.begin(), clos2.end(), cmpby_rule_state);
 
@@ -73,25 +74,26 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y)
  * to leftmost strategy; orbit tags are compared by order and by tagged
  * epsilon-paths so that earlier iterations are maximized).
  */
-void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, clos_t &c0,
+void closure_one(closure_t &clos, Tagpool &tagpool, clos_t &c0,
        nfa_state_t *n, const std::vector<Tag> &tags, closure_t *shadow,
        std::valarray<Rule> &rules)
 {
        if (n->loop) return;
        local_increment_t<uint8_t> _(n->loop);
 
+       tagtree_t &tagtree = tagpool.history;
        clositer_t c = clos.begin(), e = clos.end();
        switch (n->type) {
                case nfa_state_t::NIL:
-                       closure_one(clos, tagpool, tagtree, c0, n->nil.out, tags, shadow, rules);
+                       closure_one(clos, tagpool, c0, n->nil.out, tags, shadow, rules);
                        return;
                case nfa_state_t::ALT:
-                       closure_one(clos, tagpool, tagtree, c0, n->alt.out1, tags, shadow, rules);
-                       closure_one(clos, tagpool, tagtree, c0, n->alt.out2, tags, shadow, rules);
+                       closure_one(clos, tagpool, c0, n->alt.out1, tags, shadow, rules);
+                       closure_one(clos, tagpool, c0, n->alt.out2, tags, shadow, rules);
                        return;
                case nfa_state_t::TAG:
                        tagtree.push(n->tag.info, n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
-                       closure_one(clos, tagpool, tagtree, c0, n->tag.out, tags, shadow, rules);
+                       closure_one(clos, tagpool, c0, n->tag.out, tags, shadow, rules);
                        tagtree.pop(n->tag.info);
                        return;
                case nfa_state_t::RAN:
@@ -113,7 +115,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, clos_t &
                clos.push_back(c2);
        } else {
                clos_t &c1 = *c;
-               if (better(c1, c2, tagpool, tagtree, tags)) std::swap(c1, c2);
+               if (better(c1, c2, tagpool, tags)) std::swap(c1, c2);
                if (shadow) shadow->push_back(c2);
        }
 }
@@ -137,7 +139,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool, tagtree_t &tagtree, clos_t &
  */
 
 bool better(const clos_t &c1, const clos_t &c2,
-       Tagpool &tagpool, tagtree_t &tagtree, const std::vector<Tag> &tags)
+       Tagpool &tagpool, const std::vector<Tag> &tags)
 {
        if (c1.ttran == c2.ttran
                && c1.tvers == c2.tvers
@@ -151,6 +153,7 @@ bool better(const clos_t &c1, const clos_t &c2,
                *v1 = tagpool[c1.tvers], *v2 = tagpool[c2.tvers],
                *o1 = tagpool[c1.order], *o2 = tagpool[c2.order];
        tagver_t x, y;
+       tagtree_t &tagtree = tagpool.history;
 
        for (size_t t = 0; t < tagpool.ntags; ++t) {
                const Tag &tag = tags[t];
@@ -176,7 +179,8 @@ bool better(const clos_t &c1, const clos_t &c2,
                        if (x > y) return false;
                        if (x < y) return true;
 
-                       x = t1[t]; y = t2[t];
+                       x = tagtree.elem(t1[t]);
+                       y = tagtree.elem(t2[t]);
                        if (x < 0 || y < 0) goto leftmost;
                        if (x > y) return false;
                        if (x < y) return true;
@@ -236,6 +240,7 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
        tcmd_t *cmd = NULL, *p;
        const size_t ntag = tagpool.ntags;
        tagver_t *vers = tagpool.buffer;
+       tagtree_t &tagtree = tagpool.history;
        clositer_t b = clos.begin(), e = clos.end(), c;
 
        // for each tag, if there is at least one tagged transition,
@@ -249,7 +254,9 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
                        *vs = tagpool[c->tvers];
                for (size_t t = 0; t < ntag; ++t) {
                        const Tag &tag = tags[t];
-                       const tagver_t u = us[t];
+                       const tagver_t
+                               u = tagtree.elem(us[t]),
+                               l = tagtree.elem(ls[t]);
                        if (u == TAGVER_ZERO) continue;
 
                        const tagver_t h = history(tag) ? vs[t] : TAGVER_ZERO;
@@ -260,7 +267,7 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
                        if (n == m) ++maxver;
 
                        // add action unless already have an identical one
-                       if (fixed(tag) || (ls[t] && !history(tag))) continue;
+                       if (fixed(tag) || (l && !history(tag))) continue;
                        for (p = cmd; p; p = p->next) {
                                if (p->lhs == abs(m) && p->rhs == u && p->pred == abs(h)) break;
                        }
@@ -275,7 +282,8 @@ tcmd_t *generate_versions(closure_t &clos, const std::vector<Tag> &tags,
                        *us = tagpool[c->ttran],
                        *vs = tagpool[c->tvers];
                for (size_t t = 0; t < ntag; ++t) {
-                       const tagver_t u = us[t], v = vs[t],
+                       const tagver_t v = vs[t],
+                               u = tagtree.elem(us[t]),
                                h = history(tags[t]) ? v : TAGVER_ZERO;
                        if (u == TAGVER_ZERO) {
                                vers[t] = v;
@@ -341,9 +349,9 @@ struct cmp_t
        }
 };
 
-void orders(closure_t &clos, Tagpool &tagpool,
-       tagtree_t &tagtree, const std::vector<Tag> &tags)
+void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
 {
+       tagtree_t &tagtree = tagpool.history;
        clositer_t b = clos.begin(), e = clos.end(), c;
        const size_t
                ntag = tagpool.ntags,
@@ -397,17 +405,6 @@ void orders(closure_t &clos, Tagpool &tagpool,
        for (c = b; c != e; ++c, o += ntag) {
                c->order = tagpool.insert(o);
        }
-
-       // flatten lookahead tags (take the last on each path)
-       tagver_t *look = tagpool.buffer;
-       for (clositer_t c = clos.begin(); c != clos.end(); ++c) {
-               if (c->tlook == ZERO_TAGS) continue;
-               const tagver_t *oldl = tagpool[c->tlook];
-               for (size_t t = 0; t < ntag; ++t) {
-                       look[t] = tagtree.elem(oldl[t]);
-               }
-               c->tlook = tagpool.insert(look);
-       }
 }
 
 } // namespace re2c
index ff5be7e3b8704d1956bf649f215da123554c9202..cbf0c93430048eee68d7f96d6db378e4ae25b908 100644 (file)
@@ -5,7 +5,6 @@
 #include <map>
 
 #include "src/dfa/dfa.h"
-#include "src/dfa/tagtree.h"
 #include "src/nfa/nfa.h"
 
 namespace re2c
@@ -52,9 +51,8 @@ inline bool operator<(const newver_t &x, const newver_t &y)
 typedef std::map<newver_t, tagver_t> newvers_t;
 
 tcmd_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool,
-       tcpool_t &tcpool, tagtree_t &tagtree, std::valarray<Rule> &rules,
-       tagver_t &maxver, newvers_t &newvers, bool lookahead, closure_t *shadow,
-       const std::vector<Tag> &tags);
+       tcpool_t &tcpool, std::valarray<Rule> &rules, tagver_t &maxver,
+       newvers_t &newvers, bool lookahead, closure_t *shadow, const std::vector<Tag> &tags);
 
 } // namespace re2c
 
index 733bfcfd5c651ebc5ac826fbb7d7936dd9ac8252..dba1bcd6feb9f589ae9f460b2df32f8daf7c18e1 100644 (file)
@@ -64,7 +64,6 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        const bool lookahead = opts->lookahead;
        const size_t ntag = tags.size();
        Tagpool tagpool(ntag);
-       tagtree_t tagtree(ntag);
        kernels_t kernels(tagpool, tcpool, tags);
        closure_t clos1, clos2;
        newvers_t newvers;
@@ -88,14 +87,14 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
 
        clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS, ZERO_TAGS, ZERO_TAGS, 0};
        clos1.push_back(c0);
-       acts = closure(clos1, clos2, tagpool, tcpool, tagtree, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
+       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
        find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump);
 
        for (size_t i = 0; i < kernels.size(); ++i) {
                newvers.clear();
                for (size_t c = 0; c < nchars; ++c) {
                        reach(kernels[i], clos1, charset[c]);
-                       acts = closure(clos1, clos2, tagpool, tcpool, tagtree, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
+                       acts = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver, newvers, lookahead, dump.shadow, tags);
                        find_state(*this, i, c, kernels, clos2, acts, dump);
                }
        }
index 73ec7bc72ff06cb26bedcbaeec6c1ad309753f37..dfaa055fd9a2015916080284d5e13f182b1e9b6e 100644 (file)
@@ -57,7 +57,7 @@ void dump_dfa_t::closure_tags(cclositer_t c,
                fprintf(stderr, " %s", tagname(tag));
                fprintf(stderr, "%d", abs(vers[t]));
                if (lookahead[t]) {
-                       const tagver_t l = look[t];
+                       const tagver_t l = tagpool.history.elem(look[t]);
                        if (l == TAGVER_BOTTOM) {
                                fprintf(stderr, " &darr;");
                        } else if (l == TAGVER_CURSOR) {
@@ -89,8 +89,8 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
 
        tagver_t *look = tagpool.buffer;
        for (size_t t = 0; t < tagpool.ntags; ++t) {
-               for (c = c1; c != c2 && tagpool[c->tlook][t] == TAGVER_ZERO; ++c);
-               for (s = s1; s != s2 && tagpool[s->tlook][t] == TAGVER_ZERO; ++s);
+               for (c = c1; c != c2 && tagpool.history.elem(tagpool[c->tlook][t]) == TAGVER_ZERO; ++c);
+               for (s = s1; s != s2 && tagpool.history.elem(tagpool[s->tlook][t]) == TAGVER_ZERO; ++s);
                look[t] = c != c2 || s != s2;
        }
 
@@ -292,7 +292,8 @@ void dump_tags(const Tagpool &tagpool, size_t ttran, size_t tvers)
                *tran = tagpool[ttran],
                *vers = tagpool[tvers];
        for (size_t i = 0; i < tagpool.ntags; ++i) {
-               const tagver_t t = tran[i], v = vers[i];
+               const tagver_t v = vers[i],
+                       t = tagpool.history.elem(tran[i]);
                if (t < TAGVER_ZERO) {
                        fprintf(stderr, "%d&darr; ", -v);
                } else if (t > TAGVER_ZERO) {
index 95a854237435ac89586e2d5c290d1cabe33a0347..589ab580e4c42f38042183243be06da600d096ef 100644 (file)
@@ -33,15 +33,37 @@ kernel_t::~kernel_t()
        delete[] order;
 }
 
+static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
+       const Tagpool &tagpool)
+{
+       if (memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0) {
+               return true;
+       }
+       for (size_t i = 0; i < x->size; ++i) {
+               const tagver_t
+                       *xls = tagpool[x->tlook[i]],
+                       *yls = tagpool[y->tlook[i]];
+               for (size_t t = 0; t < tagpool.ntags; ++t) {
+                       // compare only the last tags
+                       const tagver_t
+                               xl = tagpool.history.elem(xls[t]),
+                               yl = tagpool.history.elem(yls[t]);
+                       if (xl != yl) return false;
+               }
+       }
+       return true;
+}
+
 struct kernel_eq_t
 {
+       Tagpool &tagpool;
        bool operator()(const kernel_t *x, const kernel_t *y) const
        {
                return x->size == y->size
                        && memcmp(x->state, y->state, x->size * sizeof(void*)) == 0
                        && memcmp(x->tvers, y->tvers, x->size * sizeof(size_t)) == 0
-                       && memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0;
-               // no need to compare orders: if versions and lookahead coincide, so do orders
+                       && equal_lookahead_tags(x, y, tagpool);
+               // if versions and lookahead coincide, so do orders
        }
 };
 
@@ -65,8 +87,8 @@ bool kernels_t::operator()(const kernel_t *k1, const kernel_t *k2)
        // check that kernel sizes, NFA states and orders coincide
        const bool compatible = k1->size == k2->size
                && memcmp(k1->state, k2->state, k1->size * sizeof(void*)) == 0
-               && memcmp(k1->tlook, k2->tlook, k1->size * sizeof(size_t)) == 0
-               && memcmp(k1->order, k2->order, k1->size * sizeof(size_t)) == 0;
+               && memcmp(k1->order, k2->order, k1->size * sizeof(size_t)) == 0
+               && equal_lookahead_tags(k1, k2, tagpool);
        if (!compatible) return false;
 
        // map tag versions of one kernel to that of another
@@ -78,11 +100,12 @@ bool kernels_t::operator()(const kernel_t *k1, const kernel_t *k2)
                const tagver_t
                        *xv = tagpool[k1->tvers[i]],
                        *yv = tagpool[k2->tvers[i]],
-                       *xl = tagpool[k2->tlook[i]];
+                       *xl = tagpool[k1->tlook[i]];
 
                for (size_t t = 0; t < ntag; ++t) {
                        // see note [mapping ignores items with lookahead tags]
-                       if (xl[t] != TAGVER_ZERO && !history(tags[t])) continue;
+                       if (tagpool.history.elem(xl[t]) != TAGVER_ZERO
+                               && !history(tags[t])) continue;
 
                        const tagver_t x = xv[t], y = yv[t];
                        tagver_t &x0 = y2x[y], &y0 = x2y[x];
@@ -224,11 +247,10 @@ kernels_t::result_t kernels_t::insert(const closure_t &clos,
        // get kernel hash
        uint32_t hash = static_cast<uint32_t>(nkern); // seed
        hash = hash32(hash, buffer->state, nkern * sizeof(void*));
-       hash = hash32(hash, buffer->tlook, nkern * sizeof(size_t));
        hash = hash32(hash, buffer->order, nkern * sizeof(size_t));
 
        // try to find identical kernel
-       kernel_eq_t eq;
+       kernel_eq_t eq = {tagpool};
        x = lookup.find_with(hash, buffer, eq);
        if (x != index_t::NIL) return result_t(x, acts, false);
 
@@ -308,7 +330,8 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
        tcmd_t *copy = NULL, *save = NULL, **p;
 
        for (size_t t = rule.ltag; t < rule.htag; ++t) {
-               const tagver_t l = look[t], v = abs(vers[t]),
+               const tagver_t v = abs(vers[t]),
+                       l = tagpool.history.elem(look[t]),
                        h = history(tags[t]) ? v : TAGVER_ZERO;
                tagver_t &f = dfa.finvers[t];
 
index 0ac8734ca97be52b1fc740e5c9b5a370f277fb74..f0adc14bbc5a71e21526523cd484fd811fe0d0e2 100644 (file)
@@ -24,6 +24,7 @@ Tagpool::Tagpool(size_t n)
        , buffer(new tagver_t[n])
        , maxclos(0)
        , orders(NULL)
+       , history(n)
 {}
 
 Tagpool::~Tagpool()
index 20ee9a0611a2c36498cea704aca42cc04d5e7fbe..71dcdad1b6dc976b567c8a761e3d4555a0f2e9da 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _RE2C_DFA_TAGPOOL_
 #define _RE2C_DFA_TAGPOOL_
 
+#include "src/dfa/tagtree.h"
 #include "src/re/tag.h"
 #include "src/util/lookup.h"
 #include "src/util/forbid_copy.h"
@@ -23,6 +24,8 @@ public:
        size_t maxclos;
        tagver_t *orders;
 
+       tagtree_t history;
+
        explicit Tagpool(size_t n);
        ~Tagpool();
        size_t insert_const(tagver_t ver);
index ba82175e6bbde06204b41d60ad1e022753f4d116..faf2946dc52edda6a105be8d63766a8f29e4af25 100644 (file)
@@ -13,6 +13,8 @@ tagtree_t::tagtree_t(size_t n)
        , ntag(n)
        , tags(new tagver_t[ntag])
 {
+       node_t x = {-1, TAGVER_ZERO};
+       nodes.push_back(x);
        init();
 }
 
@@ -23,9 +25,6 @@ tagtree_t::~tagtree_t()
 
 void tagtree_t::init()
 {
-       nodes.clear();
-       node_t x = {-1, TAGVER_ZERO};
-       nodes.push_back(x);
        memset(tags, 0, ntag * sizeof(tagver_t));
 }