]> granicus.if.org Git - re2c/commitdiff
More precise liveness analyses for context deduplication.
authorUlya Trofimovich <skvadrik@gmail.com>
Sun, 1 May 2016 08:18:07 +0000 (09:18 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Sun, 1 May 2016 08:18:07 +0000 (09:18 +0100)
Liveness analyses should exclude contexts that are updated before
they are used past certain point in the program.

21 files changed:
re2c/Makefile.am
re2c/src/codegen/emit_action.cc
re2c/src/codegen/input_api.cc
re2c/src/codegen/input_api.h
re2c/src/ir/adfa/adfa.cc
re2c/src/ir/adfa/adfa.h
re2c/src/ir/dfa/context_deduplication.cc
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/dfa.h
re2c/src/ir/dfa/minimization.cc
re2c/src/ir/nfa/nfa.cc
re2c/src/ir/rule.cc
re2c/src/ir/rule.h
re2c/src/ir/skeleton/path.h
re2c/src/ir/skeleton/skeleton.cc
re2c/src/ir/skeleton/skeleton.h
re2c/src/ir/tagpool.h [new file with mode: 0644]
re2c/test/contexts/dedup0.i--input(custom).c
re2c/test/contexts/dedup0.i.c
re2c/test/contexts/dedup2.i--input(custom).c
re2c/test/contexts/dedup2.i.c

index a5f472c5481df06d6815bcb8e18c6991469a97ae..621f6f0e417c70e8b264a3f4487550f2d0376700 100644 (file)
@@ -38,6 +38,7 @@ SRC_HDR = \
        src/ir/compile.h \
        src/ir/ctx.h \
        src/ir/rule.h \
+       src/ir/tagpool.h \
        src/ir/skeleton/path.h \
        src/ir/skeleton/skeleton.h \
        src/globals.h \
index 632ee8f3e0a3ca2d78f50c37b23a104b4b0fdd6e..8c32608e1e93faf9b6aecd0a1630043c375d640d 100644 (file)
@@ -54,9 +54,9 @@ void emit_action(OutputFile &o, uint32_t ind, bool &readCh,
                        emit_rule(o, ind, dfa, s->action.info.rule);
                        break;
        }
-       if (!s->ctxs.empty()) {
+       if (s->tags != 0) {
                if (dfa.base_ctxmarker) {
-                       o.wstring(opts->input_api.stmt_dist(ind, s->ctxs, dfa.contexts));
+                       o.wstring(opts->input_api.stmt_dist(ind, dfa.tagpool[s->tags], dfa.contexts));
                } else {
                        o.wstring(opts->input_api.stmt_backupctx(ind));
                }
index 4f0f010aea0a6a1204cb591f56534dbc1c58e086..659c4d43a8842ab2f4a5c3d1ffa11501301577e0 100644 (file)
@@ -107,12 +107,14 @@ std::string InputAPI::expr_dist () const
        return s;
 }
 
-std::string InputAPI::stmt_dist (uint32_t ind, const std::set<size_t> &ctxs,
+std::string InputAPI::stmt_dist (uint32_t ind, const bool *tags,
                const std::vector<CtxVar> &contexts) const
 {
        std::string s = indent(ind);
-       for (std::set<size_t>::const_iterator i = ctxs.begin(); i != ctxs.end(); ++i) {
-               s += contexts[*i].expr() + " = ";
+       for (size_t i = 0; i < contexts.size(); ++i) {
+               if (tags[i]) {
+                       s += contexts[i].expr() + " = ";
+               }
        }
        return s + expr_dist() + ";\n";
 }
index b575e57c6003bed428a41462d8b4823ee0e36db0..4ef77a0f151f9a14879e46414003e2e5dba26fab 100644 (file)
@@ -33,7 +33,7 @@ public:
        std::string stmt_backup (uint32_t ind) const;
        std::string stmt_backupctx (uint32_t ind) const;
        std::string expr_dist () const;
-       std::string stmt_dist (uint32_t ind, const std::set<size_t> &ctxs,
+       std::string stmt_dist (uint32_t ind, const bool *tags,
                const std::vector<CtxVar> &contexts) const;
        std::string expr_ctx (const std::string &ctx) const;
        std::string expr_ctx_fix (const CtxFix &ctx, const std::vector<CtxVar> &ctxvars) const;
index 8ab2539aa9a1f3b92200032f63b11e7cb3c443a0..0678ce2474c7e9a7ef2ed0e56b6e2b1627f61cd0 100644 (file)
@@ -37,6 +37,7 @@ DFA::DFA
        , head(NULL)
        , rules(dfa.rules)
        , contexts(dfa.contexts)
+       , tagpool(dfa.tagpool)
 
        // statistics
        , max_fill (0)
@@ -78,7 +79,7 @@ DFA::DFA
                *p = s;
                p = &s->next;
 
-               s->ctxs = t->ctxs;
+               s->tags = t->tags;
                s->rule = t->rule;
                s->fill = fill[i];
                s->go.span = allocate<Span>(nchars);
@@ -112,9 +113,9 @@ DFA::~DFA()
        }
 
        delete skeleton;
-
-       delete &contexts;
        delete &rules;
+       delete &contexts;
+       delete &tagpool;
 }
 
 /* note [reordering DFA states]
index b19b1face3b6c410f413fdb755e89d73a374097d..693dcb24f4c9b3dc4fef25705d39ede6f3879508 100644 (file)
@@ -12,6 +12,7 @@
 #include "src/ir/adfa/action.h"
 #include "src/ir/regexp/regexp.h"
 #include "src/ir/rule.h"
+#include "src/ir/tagpool.h"
 #include "src/util/forbid_copy.h"
 
 namespace re2c
@@ -30,7 +31,7 @@ struct State
        bool fallback;
 
        size_t rule;
-       std::set<size_t> ctxs;
+       size_t tags;
        bool isBase;
        Go go;
        Action action;
@@ -41,7 +42,7 @@ struct State
                , fill (0)
                , fallback (false)
                , rule (Rule::NONE)
-               , ctxs ()
+               , tags (0)
                , isBase (false)
                , go ()
                , action ()
@@ -67,6 +68,7 @@ struct DFA
        State * head;
        std::valarray<Rule> &rules;
        std::vector<CtxVar> &contexts;
+       Tagpool &tagpool;
        size_t max_fill;
        bool need_backup;
        bool need_backupctx;
index 99a4f225c25312c067f046f954ba4bf27e8b7938..c74ece58d1dee98ecab9e24aa94c12d247e292f0 100644 (file)
 #include <limits>
-#include <set>
-#include <vector>
 
 #include "src/ir/dfa/dfa.h"
 
 namespace re2c
 {
 
-static void calc_live(
-       const dfa_t &dfa,
-       const std::set<size_t> &fallback,
-       std::vector<bool> &visited,
-       std::vector<std::set<size_t> > &live,
-       size_t i)
+static size_t mask_dead_tags(Tagpool &tagpool,
+       size_t oldidx, const bool *live)
 {
-       if (!visited[i]) {
-               visited[i] = true;
-
-               dfa_state_t *s = dfa.states[i];
-
-               bool fallthru = false;
-               for (size_t c = 0; c < dfa.nchars; ++c) {
-                       const size_t j = s->arcs[c];
-                       if (j != dfa_t::NIL) {
-                               calc_live(dfa, fallback, visited, live, j);
-                               live[i].insert(live[j].begin(), live[j].end());
-                       } else {
-                               fallthru = true;
-                       }
-               }
+       const bool *oldtags = tagpool[oldidx];
+       bool *newtags = new bool[tagpool.ntags];
+       for (size_t i = 0; i < tagpool.ntags; ++i) {
+               newtags[i] = oldtags[i] & live[i];
+       }
+       const size_t newidx = tagpool.insert(newtags);
+       delete[] newtags;
+       return newidx;
+}
 
-               if (s->rule != Rule::NONE) {
-                       const Rule &rule = dfa.rules[s->rule];
-                       for (size_t j = rule.ltag; j < rule.htag; ++j) {
-                               live[i].insert(j);
-                       }
-                       if (rule.trail.type == Trail::VAR) {
-                               live[i].insert(rule.trail.pld.var);
-                       }
-               } else if (fallthru) {
-                       // transition to default state: all fallback rules
-                       // are potentialy reachable, their contexts are alive
-                       live[i].insert(fallback.begin(), fallback.end());
+static bool dangling_arcs(const size_t *arcs, size_t narcs)
+{
+       for (size_t i = 0; i < narcs; ++i) {
+               if (arcs[i] == dfa_t::NIL) {
+                       return true;
                }
        }
+       return false;
 }
 
-size_t deduplicate_contexts(dfa_t &dfa,
-       const std::vector<size_t> &fallback)
+/* note [liveness analyses on tags]
+ *
+ * Tag T is alive in state S if either:
+ *
+ *    - There is a transition from S to default state, S does not set T,
+ *      S is final and T belongs to tag set associated with rule in S.
+ *
+ *    - There is a transition from S to default state, S does not set T
+ *      and T belongs to any tag set associated with fallback rules.
+ *
+ *    - There is a transition from S to some state S' (maybe equal to S),
+ *      S does not set T and T is alive in S'.
+ */
+static void calc_live(const dfa_t &dfa,
+       const bool *fallback,
+       bool *visited,
+       bool *live,
+       size_t i)
 {
-       const size_t nctxs = dfa.contexts.size();
-       if (nctxs < 2) {
-               return nctxs;
+       if (visited[i]) {
+               return;
        }
 
-       std::set<size_t> fbctxs;
-       for (size_t i = 0; i < fallback.size(); ++i) {
-               const Rule &rule = dfa.rules[dfa.states[fallback[i]]->rule];
-               for (size_t j = rule.ltag; j < rule.htag; ++j) {
-                       fbctxs.insert(j);
+       visited[i] = true;
+       dfa_state_t *s = dfa.states[i];
+       const size_t ntags = dfa.contexts.size();
+
+       // add tags before recursing to child states,
+       // so that tags propagate into loopbacks to this state
+       if (dangling_arcs(s->arcs, dfa.nchars)) {
+               if (s->rule != Rule::NONE) {
+                       // final state, only rule tags are alive
+                       add_tags_with_mask(&live[i * ntags],
+                               dfa.rules[s->rule].tags,
+                               dfa.tagpool[s->tags],
+                               ntags);
+               } else {
+                       // transition to default state and dispatch on
+                       // 'yyaccept': all fallback rules are potentially
+                       // reachable, their tags are alive
+                       // no mask (no rule implies no rule tags)
+                       add_tags(&live[i * ntags], fallback, ntags);
                }
-               if (rule.trail.type == Trail::VAR) {
-                       fbctxs.insert(rule.trail.pld.var);
+       }
+
+       for (size_t c = 0; c < dfa.nchars; ++c) {
+               const size_t j = s->arcs[c];
+               if (j != dfa_t::NIL) {
+                       calc_live(dfa, fallback, visited, live, j);
+                       add_tags_with_mask(&live[i * ntags],
+                               &live[j * ntags],
+                               dfa.tagpool[s->tags],
+                               ntags);
                }
        }
+}
 
+static void mask_dead(dfa_t &dfa,
+       const bool *livetags)
+{
        const size_t nstates = dfa.states.size();
-
-       std::vector<bool> visited(nstates);
-       std::vector<std::set<size_t> > live(nstates);
-       calc_live(dfa, fbctxs, visited, live, 0);
-
-       std::vector<size_t> buffer(nctxs);
-       std::vector<bool> xxx(nctxs * nctxs);
+       const size_t ntags = dfa.contexts.size();
        for (size_t i = 0; i < nstates; ++i) {
-               const std::set<size_t>
-                       &ctxs = dfa.states[i]->ctxs,
-                       &liv = live[i];
-               std::vector<size_t>::iterator
-                       diff = buffer.begin(),
-                       diff_end = std::set_difference(liv.begin(), liv.end(),
-                               ctxs.begin(), ctxs.end(), diff);
-               for (; diff != diff_end; ++diff) {
-                       for (std::set<size_t>::const_iterator j = ctxs.begin(); j != ctxs.end(); ++j) {
-                               xxx[*diff * nctxs + *j] = xxx[*j * nctxs + *diff] = true;
+               dfa_state_t *s = dfa.states[i];
+               mask_dead_tags(dfa.tagpool, s->tags, &livetags[i * ntags]);
+       }
+}
+
+// tags that are updated here are pairwise incompatible
+// with all tags that are alive, but not updated here
+static void incompatible(bool *tbl,
+       const bool *live,
+       const bool *tags,
+       size_t ntags)
+{
+       for (size_t i = 0; i < ntags; ++i) {
+               if (live[i] && !tags[i]) {
+                       for (size_t j = 0; j < ntags; ++j) {
+                               if (tags[j]) {
+                                       tbl[i * ntags + j] = tbl[j * ntags + i] = true;
+                               }
                        }
                }
        }
+}
 
-       // We have a binary relation on the set of all contexts
-       // and must construct set decomposition into subsets such that
-       // all contexts in the same subset are equivalent.
-       //
-       // This problem is isomorphic to partitioning graph into cliques
-       // (aka finding the 'clique cover' of a graph).
-       //
-       // Finding minimal clique cover in arbitrary graph is NP-complete.
-       // We build just some cover (not necessarily minimal).
-       // The algorithm takes quadratic (in the number of contexts) time.
+static void incompatibility_table(const dfa_t &dfa,
+       const bool *livetags,
+       bool *incompattbl)
+{
+       const size_t nstates = dfa.states.size();
+       const size_t ntags = dfa.contexts.size();
+       for (size_t i = 0; i < nstates; ++i) {
+               const dfa_state_t *s = dfa.states[i];
+               incompatible(incompattbl, &livetags[i * ntags],
+                       dfa.tagpool[s->tags], ntags);
+       }
+}
 
+/* We have a binary relation on the set of all tags
+ * and must construct set decomposition into subsets such that
+ * all contexts in the same subset are equivalent.
+ *
+ * This problem is isomorphic to partitioning graph into cliques
+ * (aka finding the 'clique cover' of a graph).
+ *
+ * Finding minimal clique cover in arbitrary graph is NP-complete.
+ * We build just some cover (not necessarily minimal).
+ * The algorithm takes quadratic (in the number of tags) time.
+ * static void equivalence_classes(const std::vector<bool> &incompattbl,
+ */
+static size_t equivalence_classes(const bool *incompattbl,
+       size_t ntags, std::vector<size_t> &represent)
+{
        static const size_t END = std::numeric_limits<size_t>::max();
        std::vector<size_t>
-               part(nctxs, 0),   // partition: mapping of context to its representative
-               head(nctxs, END), // list of representatives
-               next(nctxs, END); // list of contexts mapped to the same representative
-       // skip the 1st context, it maps to itself
-       for (size_t c = 1; c < nctxs; ++c) {
+               head(ntags, END), // list of representatives
+               next(ntags, END); // list of tags mapped to the same representative
+
+       // skip the 1st tag, it maps to itself
+       for (size_t c = 1; c < ntags; ++c) {
                size_t h;
                for (h = 0; h != END; h = head[h]) {
                        size_t n;
                        for (n = h; n != END; n = next[n]) {
-                               if (xxx[c * nctxs + n]) {
+                               if (incompattbl[c * ntags + n]) {
                                        break;
                                }
                        }
                        if (n == END) {
-                               part[c] = h;
+                               represent[c] = h;
                                next[c] = next[h];
                                next[h] = c;
                                break;
                        }
                }
                if (h == END) {
-                       part[c] = c;
+                       represent[c] = c;
                        head[c] = head[0];
                        head[0] = c;
                }
        }
-       size_t ncontexts = 0;
-       for (size_t i = 0; i < nctxs; ++i) {
-               if (part[i] == i) {
-                       ++ncontexts;
+
+       size_t nreps = 0;
+       for (size_t i = 0; i < ntags; ++i) {
+               if (represent[i] == i) {
+                       ++nreps;
                }
        }
+       return nreps;
+}
 
-       for (size_t i = 0; i < nstates; ++i) {
-               dfa_state_t *s = dfa.states[i];
-               std::set<size_t> ctxs;
-               for (std::set<size_t>::const_iterator j = s->ctxs.begin(); j != s->ctxs.end(); ++j) {
-                       ctxs.insert(part[*j]);
+static size_t patch_tagset(Tagpool &tagpool, size_t oldidx,
+       const std::vector<size_t> &represent)
+{
+       const bool *oldtags = tagpool[oldidx];
+       bool *newtags = new bool[tagpool.ntags]();
+       for (size_t i = 0; i < tagpool.ntags; ++i) {
+               if (oldtags[i]) {
+                       newtags[represent[i]] = true;
                }
-               s->ctxs.swap(ctxs);
        }
-       for (size_t i = 0; i < nctxs; ++i) {
-               dfa.contexts[i].uniqname = dfa.contexts[part[i]].uniqname;
+       const size_t newidx = tagpool.insert(newtags);
+       delete[] newtags;
+       return newidx;
+}
+
+static void patch_tags(dfa_t &dfa, const std::vector<size_t> &represent)
+{
+       const size_t nstates = dfa.states.size();
+       for (size_t i = 0; i < nstates; ++i) {
+               dfa_state_t *s = dfa.states[i];
+               s->tags = patch_tagset(dfa.tagpool, s->tags, represent);
        }
 
-       return ncontexts;
+       const size_t ntags = dfa.contexts.size();
+       for (size_t i = 0; i < ntags; ++i) {
+               dfa.contexts[i].uniqname = dfa.contexts[represent[i]].uniqname;
+       }
 }
 
+size_t deduplicate_contexts(dfa_t &dfa,
+       const std::vector<size_t> &fallback)
+{
+       const size_t ntags = dfa.contexts.size();
+       if (ntags == 0) {
+               return 0;
+       }
+
+       bool *fbctxs = new bool[ntags]();
+       for (size_t i = 0; i < fallback.size(); ++i) {
+               const size_t r = dfa.states[fallback[i]]->rule;
+               add_tags(fbctxs, dfa.rules[r].tags, ntags);
+       }
+
+       const size_t nstates = dfa.states.size();
+       bool *visited = new bool[nstates]();
+       bool *live = new bool[nstates * ntags]();
+       calc_live(dfa, fbctxs, visited, live, 0);
+
+       mask_dead(dfa, live);
+
+       bool *incompattbl = new bool[ntags * ntags]();
+       incompatibility_table(dfa, live, incompattbl);
 
+       std::vector<size_t> represent(ntags, 0);
+       const size_t nreps = equivalence_classes(incompattbl, ntags, represent);
 
+       if (nreps < ntags) {
+               patch_tags(dfa, represent);
+       }
+
+       delete[] fbctxs;
+       delete[] visited;
+       delete[] live;
+       delete[] incompattbl;
+
+       return nreps;
+}
 
 } // namespace re2c
 
index 1b80cd2f8e6978dca3c44ede9bc5e18a07f56032..a8c242fe81d364d047e9a5ff5822d4bedd76b7b3 100644 (file)
@@ -96,21 +96,25 @@ dfa_t::dfa_t(
        , nchars(charset.size() - 1) // (n + 1) bounds for n ranges
        , rules(nfa.rules)
        , contexts(nfa.contexts)
+       , tagpool(*new Tagpool(contexts.size()))
 {
+       const size_t ntags = contexts.size();
        const size_t nrules = rules.size();
 
        ord_hash_set_t kernels;
        nfa_state_t **const buffer = new nfa_state_t*[nfa.size];
        std::vector<std::vector<nfa_state_t*> > arcs(nchars);
        bool *fin = new bool[nrules];
+       bool *tags = new bool[ntags];
 
        find_state(buffer, closure(buffer, nfa.root), kernels);
        for (size_t i = 0; i < kernels.size(); ++i)
        {
-               dfa_state_t *s = new dfa_state_t;
+               dfa_state_t *s = new dfa_state_t(nchars);
                states.push_back(s);
 
                memset(fin, 0, nrules * sizeof(bool));
+               memset(tags, 0, ntags * sizeof(bool));
 
                nfa_state_t **kernel;
                const size_t kernel_size = kernels.deref<nfa_state_t*>(i, kernel);
@@ -134,7 +138,7 @@ dfa_t::dfa_t(
                                        break;
                                }
                                case nfa_state_t::CTX:
-                                       s->ctxs.insert(n->value.ctx.info);
+                                       tags[n->value.ctx.info] = true;
                                        break;
                                case nfa_state_t::FIN:
                                        fin[n->value.fin.rule] = true;
@@ -144,7 +148,6 @@ dfa_t::dfa_t(
                        }
                }
 
-               s->arcs = new size_t[nchars];
                for(size_t c = 0; c < nchars; ++c)
                {
                        nfa_state_t **end = buffer;
@@ -154,6 +157,7 @@ dfa_t::dfa_t(
                        }
                        s->arcs[c] = find_state(buffer, end, kernels);
                }
+               s->tags = tagpool.insert(tags);
 
                // choose the first rule (the one with smallest rank)
                size_t r;
@@ -177,6 +181,7 @@ dfa_t::dfa_t(
        }
        delete[] buffer;
        delete[] fin;
+       delete[] tags;
 
        check_context_selfoverlap(kernels, contexts, line, cond);
 }
index 2f1e720c729a152863cbd22c411447c73ef531f4..1bea33fe936a31b6f0826d0e677a090cf4d475dd 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "src/ir/regexp/regexp.h"
 #include "src/ir/rule.h"
+#include "src/ir/tagpool.h"
 #include "src/util/forbid_copy.h"
 #include "src/util/ord_hash_set.h"
 
@@ -20,12 +21,12 @@ struct dfa_state_t
 {
        size_t *arcs;
        size_t rule;
-       std::set<size_t> ctxs;
+       size_t tags;
 
-       dfa_state_t()
-               : arcs(NULL)
+       explicit dfa_state_t(size_t nchars)
+               : arcs(new size_t[nchars])
                , rule(Rule::NONE)
-               , ctxs()
+               , tags(0)
        {}
        ~dfa_state_t()
        {
@@ -43,6 +44,7 @@ struct dfa_t
        const size_t nchars;
        std::valarray<Rule> &rules;
        std::vector<CtxVar> &contexts;
+       Tagpool &tagpool;
 
        dfa_t(const nfa_t &nfa, const charset_t &charset,
                uint32_t line, const std::string &cond);
index d00e13f59416717791567570e86f6f4d7ccec341..33ada5bb768e359b3a0994b6ef2cb75952af4e9b 100644 (file)
@@ -45,7 +45,7 @@ static void minimization_table(
                for (size_t j = 0; j < i; ++j)
                {
                        dfa_state_t *s2 = states[j];
-                       tbl[i][j] = s1->ctxs != s2->ctxs
+                       tbl[i][j] = s1->tags != s2->tags
                                || s1->rule != s2->rule;
                }
        }
@@ -133,11 +133,11 @@ static void minimization_moore(
        size_t *next = new size_t[count];
 
        // see note [distinguish states by contexts]
-       std::map<std::pair<size_t, std::set<size_t> >, size_t> init;
+       std::map<std::pair<size_t, size_t>, size_t> init;
        for (size_t i = 0; i < count; ++i)
        {
                dfa_state_t *s = states[i];
-               std::pair<size_t, std::set<size_t> > key(s->rule, s->ctxs);
+               std::pair<size_t, size_t> key(s->rule, s->tags);
                if (init.insert(std::make_pair(key, i)).second)
                {
                        part[i] = i;
index 33f802e8180ecb167d22b4d48f766b9097fb609f..acf450d2e07352a80743e5d540bf289936c8d1f0 100644 (file)
@@ -220,8 +220,13 @@ nfa_t::nfa_t(const std::vector<const RegExpRule*> &rs)
        , rules(*new std::valarray<Rule>(rs.size()))
        , contexts(*new std::vector<CtxVar>)
        , root(compile_rules(rs, *this))
-{}
-
+{
+       const size_t nrules = rules.size();
+       const size_t ntags = contexts.size();
+       for (size_t i = 0; i < nrules; ++i) {
+               init_tags(rules[i], ntags);
+       }
+}
 
 nfa_t::~nfa_t()
 {
index 6b55421d60828cbabe678b329d2458934bf4e613..6d962fe88edd6266860ea44e1b846c0131406894 100644 (file)
@@ -7,4 +7,16 @@ namespace re2c
 
 const size_t Rule::NONE = std::numeric_limits<size_t>::max();
 
+void init_tags(Rule &rule, size_t ntags)
+{
+       bool *tags = new bool[ntags]();
+       for (size_t t = rule.ltag; t < rule.htag; ++t) {
+               tags[t] = true;
+       }
+       if (rule.trail.type == Trail::VAR) {
+               tags[rule.trail.pld.var] = true;
+       }
+       rule.tags = tags;
+}
+
 } // namespace re2c
index f5500533142f3397197dcea302b9b27267485244..9b6a461a520b204bbb5affec533817656eb2267e 100644 (file)
@@ -40,7 +40,7 @@ struct Rule
        std::vector<CtxFix> ctxfix;
        Trail trail;
        bool nullable;
-
+       bool *tags;
        std::set<uint32_t> shadow;
        bool reachable;
 
@@ -51,13 +51,20 @@ struct Rule
                , ctxfix()
                , trail()
                , nullable(false)
+               , tags(NULL)
                , shadow()
                , reachable(false)
        {}
+       ~Rule()
+       {
+               delete[] tags;
+       }
 
        FORBID_COPY(Rule);
 };
 
+void init_tags(Rule &rule, size_t ntags);
+
 } // namespace re2c
 
 #endif // _RE2C_IR_RULE_
index 4ff00ce68daa6e73d719b16a74062d9e5b6131d6..89185afbe5a3ccfd22461e30f3a2df3e20d08a04 100644 (file)
@@ -59,8 +59,7 @@ public:
                                case Trail::VAR: {
                                        const size_t ctx = trail.pld.var;
                                        for (; tail != head; ++tail) {
-                                               std::set<size_t> &ctxs = skel.nodes[*tail].ctxs;
-                                               if (ctxs.find(ctx) != ctxs.end()) {
+                                               if (skel.nodes[*tail].tags[ctx]) {
                                                        return static_cast<size_t>(head - tail) - 1;
                                                }
                                        }
index 42cdaf4aa658dc380dad8d811dd9d4e2bdd247e1..8b00b3b488df868b37d35b0a8e6a5e164acd8d48 100644 (file)
@@ -7,20 +7,18 @@
 namespace re2c
 {
 
-Node::Node() :
-       arcs(),
-       arcsets(),
-       rule(Rule::NONE),
-       ctxs()
+Node::Node()
+       : arcs()
+       , arcsets()
+       , rule(Rule::NONE)
+       , tags(NULL)
 {}
 
-void Node::init(
-       const std::set<size_t> &cs,
-       size_t r,
+void Node::init(const bool *ts, size_t r,
        const std::vector<std::pair<size_t, uint32_t> > &a)
 {
        rule = r;
-       ctxs = cs;
+       tags = ts;
 
        uint32_t lb = 0;
        std::vector<std::pair<size_t, uint32_t> >::const_iterator
@@ -86,7 +84,7 @@ Skeleton::Skeleton(
                if (arcs.size() == 1 && arcs[0].first == nodes_count - 1) {
                        arcs.clear();
                }
-               nodes[i].init(s->ctxs, s->rule, arcs);
+               nodes[i].init(dfa.tagpool[s->tags], s->rule, arcs);
        }
 
        // initialize size of key
index 456d646aa91dbbd0efbb77a666014502448b5f36..7f7561d4646e468fd563022d728249cb9c300779 100644 (file)
@@ -36,10 +36,10 @@ struct Node
        arcs_t arcs;
        arcsets_t arcsets;
        size_t rule;
-       std::set<size_t> ctxs;
+       const bool *tags;
 
        Node();
-       void init(const std::set<size_t> &cs, size_t r,
+       void init(const bool *ts, size_t r,
                const std::vector<std::pair<size_t, uint32_t> > &arcs);
        bool end() const;
 
diff --git a/re2c/src/ir/tagpool.h b/re2c/src/ir/tagpool.h
new file mode 100644 (file)
index 0000000..c7e2aa7
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef _RE2C_IR_TAGPOOL_
+#define _RE2C_IR_TAGPOOL_
+
+#include "src/util/ord_hash_set.h"
+
+namespace re2c
+{
+
+struct Tagpool
+{
+       const size_t ntags;
+
+private:
+       ord_hash_set_t pool;
+
+public:
+       explicit inline Tagpool(size_t n);
+       inline size_t insert(const bool *tags);
+       inline const bool *operator[](size_t idx);
+};
+
+Tagpool::Tagpool(size_t n)
+       : ntags(n)
+       , pool()
+{
+       const bool *zerotags = new bool[ntags]();
+       insert(zerotags);
+       delete[] zerotags;
+}
+
+size_t Tagpool::insert(const bool *tags)
+{
+       return pool.insert(tags, ntags * sizeof(bool));
+}
+
+const bool *Tagpool::operator[](size_t idx)
+{
+       const bool *tags;
+       pool.deref<const bool>(idx, tags);
+       return tags;
+}
+
+inline void add_tags(bool *oldtags, const bool *newtags, size_t ntags)
+{
+       for (size_t i = 0; i < ntags; ++i) {
+               oldtags[i] |= newtags[i];
+       }
+}
+
+inline void add_tags_with_mask(bool *oldtags, const bool *newtags,
+       const bool *mask, size_t ntags)
+{
+       for (size_t i = 0; i < ntags; ++i) {
+               oldtags[i] |= newtags[i] & ~mask[i];
+       }
+}
+
+} // namespace re2c
+
+#endif // _RE2C_IR_TAGPOOL_
index e599b115a324da50781acc963cc0f801c7b057b4..b3492e835b658f2805b68c25144ef08c29611bfd 100644 (file)
@@ -2,9 +2,6 @@
 
 {
        YYCTYPE yych;
-       long yyctx0;
-       long yyctx2;
-       YYBACKUPCTX ();
        if (YYLESSTHAN (3)) YYFILL(3);
        yych = YYPEEK ();
        switch (yych) {
@@ -19,7 +16,7 @@ yy4:
        YYSKIP ();
        YYBACKUP ();
        yych = YYPEEK ();
-       yyctx0 = YYDIST();
+       YYBACKUPCTX ();
        switch (yych) {
        case 'a':       goto yy5;
        case 'b':       goto yy7;
@@ -29,7 +26,7 @@ yy4:
 yy5:
        YYSKIP ();
        yych = YYPEEK ();
-       yyctx2 = YYDIST();
+       YYBACKUPCTX ();
        switch (yych) {
        case 'z':       goto yy13;
        default:        goto yy6;
@@ -46,7 +43,7 @@ yy7:
        default:        goto yy9;
        }
 yy9:
-       YYRESTORECTX (yyctx0);
+       YYRESTORECTX ();
        {}
 yy10:
        YYSKIP ();
@@ -57,7 +54,7 @@ yy10:
        default:        goto yy12;
        }
 yy12:
-       YYRESTORECTX (yyctx0);
+       YYRESTORECTX ();
        {}
 yy13:
        YYSKIP ();
@@ -68,7 +65,7 @@ yy13:
        default:        goto yy15;
        }
 yy15:
-       YYRESTORECTX (yyctx2);
+       YYRESTORECTX ();
        {}
 }
 
index d6cdb5c28bac905db211df8e3e7f929182ff1170..6d015264c4bacd884945f4edf2f3d477bbea0834 100644 (file)
@@ -2,9 +2,6 @@
 
 {
        YYCTYPE yych;
-       long yyctx0;
-       long yyctx2;
-       YYCTXMARKER = YYCURSOR;
        if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
        yych = *YYCURSOR;
        switch (yych) {
@@ -17,7 +14,7 @@ yy3:
        {}
 yy4:
        yych = *(YYMARKER = ++YYCURSOR);
-       yyctx0 = (YYCURSOR - YYCTXMARKER);
+       YYCTXMARKER = YYCURSOR;
        switch (yych) {
        case 'a':       goto yy5;
        case 'b':       goto yy7;
@@ -26,7 +23,7 @@ yy4:
        }
 yy5:
        yych = *++YYCURSOR;
-       yyctx2 = (YYCURSOR - YYCTXMARKER);
+       YYCTXMARKER = YYCURSOR;
        switch (yych) {
        case 'z':       goto yy13;
        default:        goto yy6;
@@ -43,7 +40,7 @@ yy7:
        default:        goto yy9;
        }
 yy9:
-       YYCURSOR = YYCTXMARKER + yyctx0;
+       YYCURSOR = YYCTXMARKER;
        {}
 yy10:
        ++YYCURSOR;
@@ -54,7 +51,7 @@ yy10:
        default:        goto yy12;
        }
 yy12:
-       YYCURSOR = YYCTXMARKER + yyctx0;
+       YYCURSOR = YYCTXMARKER;
        {}
 yy13:
        ++YYCURSOR;
@@ -65,7 +62,7 @@ yy13:
        default:        goto yy15;
        }
 yy15:
-       YYCURSOR = YYCTXMARKER + yyctx2;
+       YYCURSOR = YYCTXMARKER;
        {}
 }
 
index 6cdc0cb71a83ed8995bac34563e4e6c115d7a203..1cffd83ddf49797d29e82b5fe1e4ea6cb0cf7d5d 100644 (file)
@@ -2,9 +2,6 @@
 
 {
        YYCTYPE yych;
-       long yyctx0;
-       long yyctx2;
-       YYBACKUPCTX ();
        if (YYLESSTHAN (3)) YYFILL(3);
        yych = YYPEEK ();
        switch (yych) {
        }
 yy2:
        YYSKIP ();
-       yyctx0 = YYDIST();
+       YYBACKUPCTX ();
        yych = YYPEEK ();
        goto yy6;
-       YYRESTORECTX (yyctx0);
+       YYRESTORECTX ();
        {}
 yy4:
        YYSKIP ();
-       yyctx0 = YYDIST();
+       YYBACKUPCTX ();
        switch ((yych = YYPEEK ())) {
        case 'a':       goto yy10;
        default:        goto yy8;
        }
-       YYRESTORECTX (yyctx0);
+       YYRESTORECTX ();
        {}
 yy6:
        YYSKIP ();
@@ -39,7 +36,7 @@ yy8:
        goto yy8;
 yy10:
        YYSKIP ();
-       yyctx2 = YYDIST();
+       YYBACKUPCTX ();
        yych = YYPEEK ();
        goto yy8;
 }
index 09518e22548d07dad6c8b2ff1e42154eb5bfdebc..b594ceab70d81c1d8c8b9683d8768d81a54db2df 100644 (file)
@@ -2,9 +2,6 @@
 
 {
        YYCTYPE yych;
-       long yyctx0;
-       long yyctx2;
-       YYCTXMARKER = YYCURSOR;
        if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
        yych = *YYCURSOR;
        switch (yych) {
        }
 yy2:
        ++YYCURSOR;
-       yyctx0 = (YYCURSOR - YYCTXMARKER);
+       YYCTXMARKER = YYCURSOR;
        yych = *YYCURSOR;
        goto yy6;
-       YYCURSOR = YYCTXMARKER + yyctx0;
+       YYCURSOR = YYCTXMARKER;
        {}
 yy4:
        ++YYCURSOR;
-       yyctx0 = (YYCURSOR - YYCTXMARKER);
+       YYCTXMARKER = YYCURSOR;
        switch ((yych = *YYCURSOR)) {
        case 'a':       goto yy10;
        default:        goto yy8;
        }
-       YYCURSOR = YYCTXMARKER + yyctx0;
+       YYCURSOR = YYCTXMARKER;
        {}
 yy6:
        ++YYCURSOR;
@@ -39,7 +36,7 @@ yy8:
        goto yy8;
 yy10:
        ++YYCURSOR;
-       yyctx2 = (YYCURSOR - YYCTXMARKER);
+       YYCTXMARKER = YYCURSOR;
        yych = *YYCURSOR;
        goto yy8;
 }