]> granicus.if.org Git - re2c/commitdiff
Use different datatypes for closures and kernels.
authorUlya Trofimovich <skvadrik@gmail.com>
Sat, 19 Nov 2016 23:01:44 +0000 (23:01 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Sat, 26 Nov 2016 18:02:44 +0000 (18:02 +0000)
This is a preliminary step for tracking tag versions in closures / kernels.

For now closures and kernels store the same information, but it will
diverge when we start tracking tag versions: closure items will need
some extra-data that is needed during closure construction, but shouldn't
be in kernel.

Kernel representation should also allow efficient comparison for identitiy
or compatibility (for mapping).

re2c/src/ir/dfa/closure.cc
re2c/src/ir/dfa/closure.h
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/find_state.cc
re2c/src/ir/dfa/find_state.h

index 63b2a36cadbc143efcea34929782234bf40144be..2ef786aeab532dc6a147b2a5e1b9c0ab9a630c61 100644 (file)
@@ -10,6 +10,7 @@ static void closure_one(closure_t &clos, Tagpool &tagpool, nfa_state_t *n, tagve
 static void check_tags(const Tagpool &tagpool, size_t oldidx, size_t newidx, bool *badtags);
 static bool compare_by_rule(const clos_t &c1, const clos_t &c2);
 static void prune_final_items(closure_t &clos, std::valarray<Rule> &rules);
+static bool not_fin(const clos_t &c);
 static tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, const std::valarray<Rule> &rules, bool *badtags);
 
 tagsave_t *closure(const closure_t &clos1, closure_t &clos2,
@@ -156,7 +157,7 @@ void prune_final_items(closure_t &clos, std::valarray<Rule> &rules)
        clositer_t
                b = clos.begin(),
                e = clos.end(),
-               f = std::partition(b, e, clos_t::not_final);
+               f = std::partition(b, e, not_fin);
        if (f != e) {
                std::partial_sort(f, f, e, compare_by_rule);
                // mark all rules except the first one as shadowed
@@ -169,6 +170,11 @@ void prune_final_items(closure_t &clos, std::valarray<Rule> &rules)
        }
 }
 
+bool not_fin(const clos_t &c)
+{
+       return c.state->type != nfa_state_t::FIN;
+}
+
 // WARNING: this function assumes that closure items are grouped bu rule
 tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool,
        tcpool_t &tcpool, const std::valarray<Rule> &rules, bool *badtags)
index 423613da6d239300c38eb26e54eb85fb5971dd21..d954bb45b7c5c8f8760533017da6e1a0d797d827 100644 (file)
@@ -16,8 +16,6 @@ struct clos_t
 
        inline clos_t();
        inline clos_t(nfa_state_t *s, size_t i);
-       static inline bool final(const clos_t &c);
-       static inline bool not_final(const clos_t &c);
 };
 
 typedef std::vector<clos_t> closure_t;
@@ -38,16 +36,6 @@ clos_t::clos_t(nfa_state_t *s, size_t i)
        , tagidx(i)
 {}
 
-bool clos_t::final(const clos_t &c)
-{
-       return c.state->type == nfa_state_t::FIN;
-}
-
-bool clos_t::not_final(const clos_t &c)
-{
-       return !clos_t::final(c);
-}
-
 } // namespace re2c
 
 #endif // _RE2C_IR_DFA_CLOSURE_
index a145ca157b799f41664e304639bc8e4993450b2a..6f81c4e11704ceeb13c751306406c786f6c08b22 100644 (file)
@@ -1,4 +1,3 @@
-#include <algorithm>
 #include <limits>
 #include <vector>
 
@@ -16,7 +15,7 @@ namespace re2c
 
 static tagver_t vartag_maxver(const std::valarray<Tag> &tags);
 static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol);
-static void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol);
+static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol);
 static void warn_bad_tags(const bool *badtags, const std::valarray<Tag> &tags,
        const std::valarray<Rule> &rules, const std::string &cond);
 
@@ -35,15 +34,13 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
        return NULL;
 }
 
-void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol)
+void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
 {
-       clos2.clear();
-       for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
-               nfa_state_t
-                       *s1 = c->state,
-                       *s2 = transition(s1, symbol);
-               if (s2) {
-                       clos2.push_back(clos_t(s2, c->tagidx));
+       clos.clear();
+       for (size_t i = 0; i < kernel->size; ++i) {
+               nfa_state_t *s = transition(kernel->state[i], symbol);
+               if (s) {
+                       clos.push_back(clos_t(s, kernel->tlook[i]));
                }
        }
 }
@@ -60,18 +57,18 @@ dfa_t::dfa_t(const nfa_t &nfa,
 {
        const size_t ntag = tags.size();
        Tagpool tagpool(ntag);
-       clospool_t clospool;
+       kernels_t kernels;
        closure_t clos1, clos2;
        bool *badtags = new bool[ntag]();
 
        maxtagver = vartag_maxver(tags);
        clos1.push_back(clos_t(nfa.root, ZERO_TAGS));
        closure(clos1, clos2, tagpool, tcpool, rules, badtags);
-       clospool.insert(clos2);
+       kernels.insert(clos2);
 
-       // closures are in sync with DFA states
-       for (size_t i = 0; i < clospool.size(); ++i) {
-               const closure_t &clos0 = clospool[i];
+       // closure kernels are in sync with DFA states
+       for (size_t i = 0; i < kernels.size(); ++i) {
+               const kernel_t *kernel = kernels[i];
 
                // create new DFA state
                dfa_state_t *s = new dfa_state_t(nchars);
@@ -79,20 +76,22 @@ dfa_t::dfa_t(const nfa_t &nfa,
 
                // check if the new state is final
                // see note [at most one final item per closure]
-               cclositer_t e = clos0.end(),
-                       f = std::find_if(clos0.begin(), e, clos_t::final);
-               if (f != e) {
-                       s->rule = f->state->rule;
-                       s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[f->tagidx], rules[s->rule].tags, ntag);
+               for (size_t i = 0; i < kernel->size; ++i) {
+                       const nfa_state_t *f = kernel->state[i];
+                       if (f->type == nfa_state_t::FIN) {
+                               s->rule = f->rule;
+                               s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], rules[s->rule].tags, ntag);
+                               break;
+                       }
                }
 
                // for each alphabet symbol, build tagged epsilon-closure
                // of all NFA states reachable on that symbol, then try to
                // find identical closure or add the new one
                for (size_t c = 0; c < nchars; ++c) {
-                       reach(clos0, clos1, charset[c]);
+                       reach(kernel, clos1, charset[c]);
                        s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, badtags);
-                       s->arcs[c] = clospool.insert(clos2);
+                       s->arcs[c] = kernels.insert(clos2);
                }
        }
 
index fe7621560d318436696cbda5c3a549b319688d82..3bfd878f7da30d130a80ab57376e7a3d29a9916e 100644 (file)
@@ -6,71 +6,96 @@
 namespace re2c
 {
 
-static uint32_t hashclos(const closure_t &clos);
-static bool eqclos(const closure_t *clos1, const closure_t *clos2);
+kernel_t::kernel_t(size_t n)
+       : size(n)
+       , state(new nfa_state_t*[size])
+       , tlook(new size_t[size])
+{}
 
-uint32_t hashclos(const closure_t &clos)
+kernel_t *kernel_t::copy(const kernel_t &k)
 {
-       uint32_t h = static_cast<uint32_t>(clos.size()); // seed
-       for (cclositer_t c = clos.begin(); c != clos.end(); ++c) {
-               h = hash32(h, &c->state, sizeof(c->state));
-               h = hash32(h, &c->tagidx, sizeof(c->tagidx));
-       }
-       return h;
+       const size_t n = k.size;
+       kernel_t *kcopy = new kernel_t(n);
+       memcpy(kcopy->state, k.state, n * sizeof(void*));
+       memcpy(kcopy->tlook, k.tlook, n * sizeof(size_t));
+       return kcopy;
 }
 
-bool eqclos(const closure_t *clos1, const closure_t *clos2)
+kernel_t::~kernel_t()
 {
-       if (clos1->size() != clos2->size()) {
-               return false;
-       }
-       for (cclositer_t c1 = clos1->begin(), c2 = clos2->begin();
-               c1 != clos1->end(); ++c1, ++c2) {
-               if (c1->state != c2->state
-                       || c1->tagidx != c2->tagidx) {
-                       return false;
-               }
-       }
-       return true;
+       delete[] state;
+       delete[] tlook;
 }
 
-clospool_t::clospool_t(): lookup() {}
+struct kernel_eq_t
+{
+       bool operator()(const kernel_t *x, const kernel_t *y) const
+       {
+               return x->size == y->size
+                       && memcmp(x->state, y->state, x->size * sizeof(void*)) == 0
+                       && memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0;
+       }
+};
+
+kernels_t::kernels_t()
+       : lookup()
+       , maxsize(256) // usually ranges from one to some twenty
+       , buffer(new kernel_t(maxsize))
+{}
 
-clospool_t::~clospool_t()
+kernels_t::~kernels_t()
 {
+       delete buffer;
+
        const size_t n = lookup.size();
        for (size_t i = 0; i < n; ++i) {
                delete lookup[i];
        }
 }
 
-size_t clospool_t::size() const
+size_t kernels_t::size() const
 {
        return lookup.size();
 }
 
-const closure_t& clospool_t::operator[](size_t idx) const
+const kernel_t *kernels_t::operator[](size_t idx) const
 {
-       return *lookup[idx];
+       return lookup[idx];
 }
 
-size_t clospool_t::insert(const closure_t &clos)
+size_t kernels_t::insert(const closure_t &clos)
 {
+       const size_t nkern = clos.size();
+
        // empty closure corresponds to default state
-       if (clos.empty()) {
-               return dfa_t::NIL;
-       }
+       if (nkern == 0) return dfa_t::NIL;
 
-       const uint32_t hash = hashclos(clos);
+       // resize buffer if closure is too large
+       if (maxsize < nkern) {
+               maxsize = nkern * 2; // in advance
+               delete buffer;
+               buffer = new kernel_t(maxsize);
+       }
 
-       // try to find an identical DFA state
-       size_t idx = lookup.find_with(hash, &clos, eqclos);
-       if (idx != closlookup_t::NIL) {
-               return idx;
+       // copy closure to buffer kernel
+       buffer->size = nkern;
+       for (size_t i = 0; i < nkern; ++i) {
+               const clos_t &c = clos[i];
+               buffer->state[i] = c.state;
+               buffer->tlook[i] = c.tagidx;
        }
 
-       // otherwise add a new state
-       return lookup.push(hash, new closure_t(clos));
+       // get kernel hash
+       uint32_t hash = static_cast<uint32_t>(nkern); // seed
+       hash = hash32(hash, buffer->state, nkern * sizeof(void*));
+       hash = hash32(hash, buffer->tlook, nkern * sizeof(size_t));
+
+       // try to find identical kernel
+       size_t idx = lookup.find_with(hash, buffer, kernel_eq_t());
+       if (idx != index_t::NIL) return idx;
+
+       // otherwise add new kernel
+       return lookup.push(hash, kernel_t::copy(*buffer));
 }
 
 } // namespace re2c
index a68061ee2400caab0d10ca99fd49194fe27f9950..931f9e98e39e2ff54bc6322f601e944b907a8435 100644 (file)
@@ -2,25 +2,40 @@
 #define _RE2C_IR_DFA_FIND_STATE_
 
 #include "src/ir/dfa/closure.h"
+#include "src/util/forbid_copy.h"
 #include "src/util/lookup.h"
 
 namespace re2c
 {
 
-struct Tagpool;
+struct kernel_t
+{
+       size_t size;
+       nfa_state_t **state;
+       size_t *tlook;
+
+       explicit kernel_t(size_t n);
+       ~kernel_t();
+       static kernel_t *copy(const kernel_t &k);
+       FORBID_COPY(kernel_t);
+};
 
-struct clospool_t
+struct kernels_t
 {
 private:
-       typedef lookup_t<const closure_t*> closlookup_t;
-       closlookup_t lookup;
+       typedef lookup_t<const kernel_t*> index_t;
+
+       index_t lookup;
+       size_t maxsize;
+       kernel_t *buffer;
 
 public:
-       clospool_t();
-       ~clospool_t();
+       kernels_t();
+       ~kernels_t();
        size_t size() const;
-       const closure_t& operator[](size_t idx) const;
+       const kernel_t* operator[](size_t idx) const;
        size_t insert(const closure_t &clos);
+       FORBID_COPY(kernels_t);
 };
 
 } // namespace re2c