]> granicus.if.org Git - re2c/commitdiff
Use tagpool to store and manipulate intermediate tag sets.
authorUlya Trofimovich <skvadrik@gmail.com>
Tue, 17 May 2016 16:46:05 +0000 (17:46 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Tue, 17 May 2016 16:46:05 +0000 (17:46 +0100)
Tagpool now has a special buffer to accommodate the needs of
intermediate calculations involving tag sets. Tagpool is in charge
of these calculations now; it has various functions to merge,
mask and whatever one has to do with tag sets.

re2c/Makefile.am
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/tag_deduplication.cc
re2c/src/ir/nfa/init_rules.cc
re2c/src/ir/nfa/nfa.cc
re2c/src/ir/nfa/nfa.h
re2c/src/ir/rule.h
re2c/src/ir/skeleton/skeleton.cc
re2c/src/ir/skeleton/skeleton.h
re2c/src/ir/tagpool.cc [new file with mode: 0644]
re2c/src/ir/tagpool.h

index f4deee0e8d4a7a3303b45b4b40c61d1295148be8..3c1597a0bbed11536552f0102953b3aac6ee8ec1 100644 (file)
@@ -114,6 +114,7 @@ SRC = \
        src/ir/skeleton/skeleton.cc \
        src/ir/skeleton/unreachable_nullable.cc \
        src/ir/tag.cc \
+       src/ir/tagpool.cc \
        src/main.cc \
        src/parse/code.cc \
        src/parse/input.cc \
index ba9c85d49be853a1d521dd0ffb3eeea380a95fd2..7b748166c9303b1af8366af0c2d1066126fdf2dd 100644 (file)
@@ -137,7 +137,7 @@ dfa_t::dfa_t(const nfa_t &nfa,
        , nchars(charset.size() - 1) // (n + 1) bounds for n ranges
        , rules(nfa.rules)
        , tags(*nfa.tags)
-       , tagpool(*new Tagpool(tags.size()))
+       , tagpool(*nfa.tagpool)
 {
        const size_t ntags = tags.size();
        const size_t nrules = rules.size();
@@ -178,7 +178,7 @@ dfa_t::dfa_t(const nfa_t &nfa,
                                                for (; charset[c] != r->lower(); ++c);
                                                for (; charset[c] != r->upper(); ++c) {
                                                        merge_tags_with_mask(&arctags[c * ntags], newtags,
-                                                               &mask[c * ntags], rules[m->rule].tags,
+                                                               &mask[c * ntags], tagpool[rules[m->rule].tags],
                                                                badtags, ntags);
                                                        arcs[c].push_back(m);
                                                }
@@ -187,7 +187,7 @@ dfa_t::dfa_t(const nfa_t &nfa,
                                }
                                case nfa_state_t::FIN:
                                        merge_tags_with_mask(&arctags[nchars * ntags], newtags,
-                                               &mask[nchars * ntags], rules[n->rule].tags,
+                                               &mask[nchars * ntags], tagpool[rules[n->rule].tags],
                                                badtags, ntags);
                                        fin[n->rule] = true;
                                        break;
index 3ef91e068bf3a83b312abdf0f3021b17399ca521..bca0d22eff4e81f9be787098b18108d6511eef00 100644 (file)
@@ -5,19 +5,6 @@
 namespace re2c
 {
 
-static size_t mask_dead_tags(Tagpool &tagpool,
-       size_t oldidx, const bool *live)
-{
-       const bool *oldtags = tagpool[oldidx];
-       bool *newtags = new bool[tagpool.ntags];
-       for (size_t i = 0; i < tagpool.ntags; ++i) {
-               newtags[i] = oldtags[i] & live[i];
-       }
-       const size_t newidx = tagpool.insert(newtags);
-       delete[] newtags;
-       return newidx;
-}
-
 static bool dangling_arcs(const size_t *arcs, size_t narcs)
 {
        for (size_t i = 0; i < narcs; ++i) {
@@ -30,37 +17,38 @@ static bool dangling_arcs(const size_t *arcs, size_t narcs)
 
 /* note [liveness analyses on tags]
  *
- * Tag T is alive in state S if either:
+ * Tag T is alive in state S if there is a transition
+ * from S to some state S' that does not update T
+ * and either:
  *
- *    - There is a transition from S to default state that does not set T,
- *      S is final and T belongs to tag set associated with rule in S.
+ *    - S' is the default state
+ *      and S is final
+ *      and T belongs to S's final tag set
  *
- *    - There is a transition from S to default state that does not set T
- *      and T belongs to any tag set associated with fallback rules.
+ *    - S' is the default state
+ *      and S is not final
+ *      and T belongs to fallback tag set
  *
- *    - There is a transition from S to some state S' (maybe equal to S)
- *      that does not set T and T is alive in S'.
+ *    - S' is not the default state
+ *      and T is alive in S'
  */
-static void calc_live(const dfa_t &dfa, const bool *fallback, bool *live)
+static void calc_live(const dfa_t &dfa, size_t fallback, size_t *live)
 {
        const size_t nstates = dfa.states.size();
-       const size_t ntags = dfa.tags.size();
 
        for (size_t i = 0; i < nstates; ++i) {
                dfa_state_t *s = dfa.states[i];
                if (dangling_arcs(s->arcs, dfa.nchars)) {
                        if (s->rule != Rule::NONE) {
                                // final state, only rule tags are alive
-                               add_tags_with_mask(&live[i * ntags],
-                                       dfa.rules[s->rule].tags,
-                                       dfa.tagpool[s->rule_tags],
-                                       ntags);
+                               dfa.tagpool.orl_with_mask(&live[i],
+                                       dfa.rules[s->rule].tags, s->rule_tags);
                        } else {
                                // transition to default state and dispatch on
                                // 'yyaccept': all fallback rules are potentially
                                // reachable, their tags are alive
                                // no mask: no rule implies no rule tags
-                               add_tags(&live[i * ntags], fallback, ntags);
+                               dfa.tagpool.orl(&live[i], fallback);
                        }
                }
        }
@@ -72,33 +60,28 @@ static void calc_live(const dfa_t &dfa, const bool *fallback, bool *live)
                        for (size_t c = 0; c < dfa.nchars; ++c) {
                                const size_t j = s->arcs[c];
                                if (j != dfa_t::NIL) {
-                                       loop |= addcmp_tags_with_mask(&live[i * ntags],
-                                               &live[j * ntags],
-                                               dfa.tagpool[s->tags[c]],
-                                               ntags);
+                                       const size_t old = live[i];
+                                       dfa.tagpool.orl_with_mask(&live[i], live[j], s->tags[c]);
+                                       loop |= old != live[i];
                                }
                        }
                }
        }
 }
 
-static void mask_dead(dfa_t &dfa,
-       const bool *livetags)
+static void mask_dead(dfa_t &dfa, const size_t *live)
 {
        const size_t nstates = dfa.states.size();
-       const size_t ntags = dfa.tags.size();
        for (size_t i = 0; i < nstates; ++i) {
                dfa_state_t *s = dfa.states[i];
                for (size_t c = 0; c < dfa.nchars; ++c) {
                        const size_t j = s->arcs[c];
                        if (j != dfa_t::NIL) {
-                               s->tags[c] = mask_dead_tags(dfa.tagpool,
-                                       s->tags[c], &livetags[j * ntags]);
+                               dfa.tagpool.andl(&s->tags[c], live[j]);
                        }
                }
                if (s->rule != Rule::NONE) {
-                       s->rule_tags = mask_dead_tags(dfa.tagpool,
-                               s->rule_tags, dfa.rules[s->rule].tags);
+                       dfa.tagpool.andl(&s->rule_tags, dfa.rules[s->rule].tags);
                }
        }
 }
@@ -106,10 +89,11 @@ static void mask_dead(dfa_t &dfa,
 // tags that are updated here are pairwise incompatible
 // with all tags that are alive, but not updated here
 static void incompatible(bool *tbl,
-       const bool *live,
-       const bool *tags,
-       size_t ntags)
+       Tagpool &tagpool, size_t l, size_t t)
 {
+       const size_t ntags = tagpool.ntags;
+       const bool *live = tagpool[l];
+       const bool *tags = tagpool[t];
        for (size_t i = 0; i < ntags; ++i) {
                if (live[i] && !tags[i]) {
                        for (size_t j = 0; j < ntags; ++j) {
@@ -122,8 +106,7 @@ static void incompatible(bool *tbl,
 }
 
 static void incompatibility_table(const dfa_t &dfa,
-       const bool *livetags,
-       const bool *deftags,
+       const size_t *livetags, size_t deftags,
        bool *incompattbl)
 {
        const size_t nstates = dfa.states.size();
@@ -133,23 +116,17 @@ static void incompatibility_table(const dfa_t &dfa,
                for (size_t c = 0; c < dfa.nchars; ++c) {
                        const size_t j = s->arcs[c];
                        if (j != dfa_t::NIL) {
-                               incompatible(incompattbl,
-                                       &livetags[j * ntags],
-                                       dfa.tagpool[s->tags[c]],
-                                       ntags);
+                               incompatible(incompattbl, dfa.tagpool,
+                                       livetags[j], s->tags[c]);
                        }
                }
                if (dangling_arcs(s->arcs, dfa.nchars)) {
                        if (s->rule != Rule::NONE) {
-                               incompatible(incompattbl,
-                                       dfa.rules[s->rule].tags,
-                                       dfa.tagpool[s->rule_tags],
-                                       ntags);
+                               incompatible(incompattbl, dfa.tagpool,
+                                       dfa.rules[s->rule].tags, s->rule_tags);
                        } else {
-                               incompatible(incompattbl,
-                                       deftags,
-                                       dfa.tagpool[s->rule_tags],
-                                       ntags);
+                               incompatible(incompattbl, dfa.tagpool,
+                                       deftags, s->rule_tags);
                        }
                }
        }
@@ -177,7 +154,6 @@ static void incompatibility_table(const dfa_t &dfa,
  * Finding minimal clique cover in arbitrary graph is NP-complete.
  * We build just some cover (not necessarily minimal).
  * The algorithm takes quadratic (in the number of tags) time.
- * static void equivalence_classes(const std::vector<bool> &incompattbl,
  */
 static void equivalence_classes(const bool *incompattbl,
        size_t ntags, std::vector<size_t> &represent)
@@ -212,30 +188,15 @@ static void equivalence_classes(const bool *incompattbl,
        }
 }
 
-static size_t patch_tagset(Tagpool &tagpool, size_t oldidx,
-       const std::vector<size_t> &represent)
-{
-       const bool *oldtags = tagpool[oldidx];
-       bool *newtags = new bool[tagpool.ntags]();
-       for (size_t i = 0; i < tagpool.ntags; ++i) {
-               if (oldtags[i]) {
-                       newtags[represent[i]] = true;
-               }
-       }
-       const size_t newidx = tagpool.insert(newtags);
-       delete[] newtags;
-       return newidx;
-}
-
 static void patch_tags(dfa_t &dfa, const std::vector<size_t> &represent)
 {
        const size_t nstates = dfa.states.size();
        for (size_t i = 0; i < nstates; ++i) {
                dfa_state_t *s = dfa.states[i];
                for (size_t c = 0; c < dfa.nchars; ++c) {
-                       s->tags[c] = patch_tagset(dfa.tagpool, s->tags[c], represent);
+                       dfa.tagpool.subst(&s->tags[c], represent);
                }
-               s->rule_tags = patch_tagset(dfa.tagpool, s->rule_tags, represent);
+               dfa.tagpool.subst(&s->rule_tags, represent);
        }
 
        const size_t ntags = dfa.tags.size();
@@ -255,14 +216,14 @@ size_t deduplicate_tags(dfa_t &dfa,
                return 0;
        }
 
-       bool *fbtags = new bool[ntags]();
+       size_t fbtags = 0;
        for (size_t i = 0; i < fallback.size(); ++i) {
                const size_t r = dfa.states[fallback[i]]->rule;
-               add_tags(fbtags, dfa.rules[r].tags, ntags);
+               dfa.tagpool.orl(&fbtags, dfa.rules[r].tags);
        }
 
        const size_t nstates = dfa.states.size();
-       bool *live = new bool[nstates * ntags]();
+       size_t *live = new size_t[nstates]();
        calc_live(dfa, fbtags, live);
 
        mask_dead(dfa, live);
@@ -284,7 +245,6 @@ size_t deduplicate_tags(dfa_t &dfa,
                patch_tags(dfa, represent);
        }
 
-       delete[] fbtags;
        delete[] live;
        delete[] incompattbl;
 
index 921714c7e9b1454b1458d1ffa879e3d32812cd6d..dc3dc51247f672d5d4a542886cd4b2c97afc1416 100644 (file)
@@ -35,7 +35,8 @@ static void assert_tags_used_once(const Rule &rule,
 
 void init_rules(const std::vector<const RegExpRule*> &regexps,
        std::valarray<Rule> &rules,
-       const std::valarray<Tag> &tags)
+       const std::valarray<Tag> &tags,
+       Tagpool &tagpool)
 {
        const size_t nr = rules.size();
        const size_t nt = tags.size();
@@ -50,10 +51,12 @@ void init_rules(const std::vector<const RegExpRule*> &regexps,
                rule.htag = t;
 
                // mark *all* variable tags, including trailing context
-               rule.tags = new bool[nt]();
+               bool *mask = new bool[nt]();
                for (size_t i = rule.ltag; i < rule.htag; ++i) {
-                       rule.tags[i] = tags[i].type == Tag::VAR;
+                       mask[i] = tags[i].type == Tag::VAR;
                }
+               rule.tags = tagpool.insert(mask);
+               delete[] mask;
 
                // tags in trailing context are forbidden (they make no sense),
                // and since tags are constructed in reversed order, this implies
index b2dc638aea4160a3a9aaa49d1cc10093c8f0dc11..dc92245861f2c1a184ddfa2c5e435a14e7ebcb59 100644 (file)
@@ -8,18 +8,20 @@ nfa_t::nfa_t(const std::vector<const RegExpRule*> &regexps)
        , states(NULL)
        , rules(*new std::valarray<Rule>(regexps.size()))
        , tags(NULL)
+       , tagpool(NULL)
        , root(NULL)
 {
        size_t ntags = 0;
        max_size = counters(regexps, ntags);
 
+       tagpool = new Tagpool(ntags);
        tags = new std::valarray<Tag>(ntags);
        make_tags(regexps, *tags);
 
        states = new nfa_state_t[max_size];
        regexps2nfa(regexps, *this);
 
-       init_rules(regexps, rules, *tags);
+       init_rules(regexps, rules, *tags, *tagpool);
 }
 
 nfa_t::~nfa_t()
index b3e601876c561eab4f1cfb4bfe3ee2f57fc66ab0..5d4f24e44a67bcaf831d8ab41338637bbb1b12e4 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "src/ir/regexp/regexp.h"
 #include "src/ir/rule.h"
+#include "src/ir/tagpool.h"
 #include "src/util/forbid_copy.h"
 
 namespace re2c
@@ -76,6 +77,7 @@ struct nfa_t
        nfa_state_t *states;
        std::valarray<Rule> &rules;
        std::valarray<Tag> *tags;
+       Tagpool *tagpool;
        nfa_state_t *root;
 
        nfa_t(const std::vector<const RegExpRule*> &rs);
@@ -90,7 +92,8 @@ void regexps2nfa(const std::vector<const RegExpRule*> &regexps, nfa_t &nfa);
 bool nullable_rule(const RegExpRule *rule);
 void init_rules(const std::vector<const RegExpRule*> &regexps,
        std::valarray<Rule> &rules,
-       const std::valarray<Tag> &tags);
+       const std::valarray<Tag> &tags,
+       Tagpool &tagpool);
 
 } // namespace re2c
 
index 0afadec85c52d9eb09b1dac507c9a75fd3395ce7..0cfc6bdb915bc8499eeee6556228434587c44761 100644 (file)
@@ -38,8 +38,8 @@ struct Rule
        size_t ltag;
        size_t htag;
        size_t trail;
+       size_t tags;
        bool nullable;
-       bool *tags;
        std::set<uint32_t> shadow;
        bool reachable;
 
@@ -48,15 +48,11 @@ struct Rule
                , ltag(0)
                , htag(0)
                , trail(Tag::NONE)
+               , tags(0)
                , nullable(false)
-               , tags(NULL)
                , shadow()
                , reachable(false)
        {}
-       ~Rule()
-       {
-               delete[] tags;
-       }
 
        FORBID_COPY(Rule);
 };
index b83b07b97d61d7e616344aa537d43a3166028556..00140e218761632e23b92faddcab148b0c536080 100644 (file)
@@ -44,11 +44,6 @@ void Node::init(const bool *ts, size_t r,
        }
 }
 
-Node::~Node()
-{
-       delete[] tags;
-}
-
 bool Node::end() const
 {
        return arcs.size() == 0;
@@ -72,7 +67,6 @@ Skeleton::Skeleton(
        , tags(dfa.tags)
 {
        const size_t nc = cs.size() - 1;
-       const size_t ntags = tags.size();
 
        // initialize skeleton nodes
        for (size_t i = 0; i < nodes_count - 1; ++i) {
@@ -93,13 +87,12 @@ Skeleton::Skeleton(
 
                // in skeleton we are only interested in trailing contexts
                // which may be attributed to states rather than transitions
-               bool *tags = new bool[ntags]();
-               add_tags(tags, dfa.tagpool[s->rule_tags], ntags);
+               size_t tags = s->rule_tags;
                for (size_t c = 0; c < nc; ++c) {
-                       add_tags(tags, dfa.tagpool[s->tags[c]], ntags);
+                       dfa.tagpool.orl(&tags, s->tags[c]);
                }
 
-               nodes[i].init(tags, s->rule, arcs);
+               nodes[i].init(dfa.tagpool[tags], s->rule, arcs);
        }
 
        // initialize size of key
index 1161011f6d88898a6b5355982585e7c524033652..7e54d372b261805dd6259c484e625cf7c8720fd1 100644 (file)
@@ -39,7 +39,6 @@ struct Node
        const bool *tags;
 
        Node();
-       ~Node();
        void init(const bool *ts, size_t r,
                const std::vector<std::pair<size_t, uint32_t> > &arcs);
        bool end() const;
diff --git a/re2c/src/ir/tagpool.cc b/re2c/src/ir/tagpool.cc
new file mode 100644 (file)
index 0000000..3253ea2
--- /dev/null
@@ -0,0 +1,97 @@
+#include "src/ir/tagpool.h"
+#include "src/util/forbid_copy.h"
+
+namespace re2c
+{
+
+Tagpool::Tagpool(size_t n)
+       : ntags(n)
+       , pool()
+       , buff(new bool[ntags]())
+{
+       // all-no tag set must have number 0
+       insert(buff);
+}
+
+Tagpool::~Tagpool()
+{
+       delete[] buff;
+}
+
+size_t Tagpool::insert(const bool *tags)
+{
+       return pool.insert(tags, ntags * sizeof(bool));
+}
+
+void Tagpool::orl(size_t *pt, size_t o)
+{
+       const size_t t = *pt;
+       if (t == o || o == 0) {
+               return;
+       } else if (t == 0) {
+               *pt = o;
+               return;
+       }
+
+       const bool *tags = operator[](t);
+       const bool *ortags = operator[](o);
+       for (size_t i = 0; i < ntags; ++i) {
+               buff[i] = tags[i] | ortags[i];
+       }
+       *pt = insert(buff);
+}
+
+void Tagpool::orl_with_mask(size_t *pt, size_t o, size_t m)
+{
+       const size_t t = *pt;
+       if (t == o || o == 0) {
+               return;
+       }
+
+       const bool *tags = operator[](t);
+       const bool *ortags = operator[](o);
+       const bool *masktags = operator[](m);
+       for (size_t i = 0; i < ntags; ++i) {
+               buff[i] = tags[i] | (ortags[i] & ~masktags[i]);
+       }
+       *pt = insert(buff);
+}
+
+void Tagpool::andl(size_t *pt, size_t a)
+{
+       const size_t t = *pt;
+       if (t == a) {
+               return;
+       } else if (t == 0 || a == 0) {
+               *pt = 0;
+               return;
+       }
+
+       const bool *tags = operator[](t);
+       const bool *andtags = operator[](a);
+       for (size_t i = 0; i < ntags; ++i) {
+               buff[i] = tags[i] & andtags[i];
+       }
+       *pt = insert(buff);
+}
+
+void Tagpool::subst(size_t *pt, const std::vector<size_t> &represent)
+{
+       const bool *tags = operator[](*pt);
+       memset(buff, 0, ntags * sizeof(bool));
+       for (size_t i = 0; i < ntags; ++i) {
+               if (tags[i]) {
+                       buff[represent[i]] = true;
+               }
+       }
+       *pt = insert(buff);
+}
+
+const bool *Tagpool::operator[](size_t idx)
+{
+       const bool *tags;
+       pool.deref<const bool>(idx, tags);
+       return tags;
+}
+
+} // namespace re2c
index d5a8dca1790aaa9d4d9f28c8c7fba6f3f4f40afb..9b97296562192a59860b55b84775507bafe8a725 100644 (file)
@@ -2,6 +2,7 @@
 #define _RE2C_IR_TAGPOOL_
 
 #include "src/util/ord_hash_set.h"
+#include "src/util/forbid_copy.h"
 
 namespace re2c
 {
@@ -12,61 +13,20 @@ struct Tagpool
 
 private:
        ord_hash_set_t pool;
+       bool *buff;
 
 public:
-       explicit inline Tagpool(size_t n);
-       inline size_t insert(const bool *tags);
-       inline const bool *operator[](size_t idx);
+       explicit Tagpool(size_t n);
+       ~Tagpool();
+       size_t insert(const bool *tags);
+       void orl(size_t *pt, size_t o);
+       void orl_with_mask(size_t *pt, size_t o, size_t m);
+       void andl(size_t *pt, size_t a);
+       void subst(size_t *pt, const std::vector<size_t> &represent);
+       const bool *operator[](size_t idx);
+       FORBID_COPY(Tagpool);
 };
 
-Tagpool::Tagpool(size_t n)
-       : ntags(n)
-       , pool()
-{
-       const bool *zerotags = new bool[ntags]();
-       insert(zerotags);
-       delete[] zerotags;
-}
-
-size_t Tagpool::insert(const bool *tags)
-{
-       return pool.insert(tags, ntags * sizeof(bool));
-}
-
-const bool *Tagpool::operator[](size_t idx)
-{
-       const bool *tags;
-       pool.deref<const bool>(idx, tags);
-       return tags;
-}
-
-inline void add_tags(bool *oldtags, const bool *newtags, size_t ntags)
-{
-       for (size_t i = 0; i < ntags; ++i) {
-               oldtags[i] |= newtags[i];
-       }
-}
-
-inline void add_tags_with_mask(bool *oldtags, const bool *newtags,
-       const bool *mask, size_t ntags)
-{
-       for (size_t i = 0; i < ntags; ++i) {
-               oldtags[i] |= newtags[i] & ~mask[i];
-       }
-}
-
-inline bool addcmp_tags_with_mask(bool *oldtags, const bool *newtags,
-       const bool *mask, size_t ntags)
-{
-       bool diff = false;
-       for (size_t i = 0; i < ntags; ++i) {
-               const bool old = oldtags[i];
-               oldtags[i] |= newtags[i] & ~mask[i];
-               diff |= old != oldtags[i];
-       }
-       return diff;
-}
-
 } // namespace re2c
 
 #endif // _RE2C_IR_TAGPOOL_